You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1197 lines
52KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)_buf;
  78. int wrap = _wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. #define DCTELEM_FUNCS(dctcoef, suffix) \
  107. static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \
  108. const uint8_t *_pixels, \
  109. int line_size) \
  110. { \
  111. const pixel *pixels = (const pixel *) _pixels; \
  112. dctcoef *restrict block = (dctcoef *) _block; \
  113. int i; \
  114. \
  115. /* read the pixels */ \
  116. for(i=0;i<8;i++) { \
  117. block[0] = pixels[0]; \
  118. block[1] = pixels[1]; \
  119. block[2] = pixels[2]; \
  120. block[3] = pixels[3]; \
  121. block[4] = pixels[4]; \
  122. block[5] = pixels[5]; \
  123. block[6] = pixels[6]; \
  124. block[7] = pixels[7]; \
  125. pixels += line_size / sizeof(pixel); \
  126. block += 8; \
  127. } \
  128. } \
  129. \
  130. static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
  131. DCTELEM *_block, \
  132. int line_size) \
  133. { \
  134. int i; \
  135. pixel *restrict pixels = (pixel *restrict)_pixels; \
  136. dctcoef *block = (dctcoef*)_block; \
  137. line_size /= sizeof(pixel); \
  138. \
  139. for(i=0;i<8;i++) { \
  140. pixels[0] += block[0]; \
  141. pixels[1] += block[1]; \
  142. pixels[2] += block[2]; \
  143. pixels[3] += block[3]; \
  144. pixels[4] += block[4]; \
  145. pixels[5] += block[5]; \
  146. pixels[6] += block[6]; \
  147. pixels[7] += block[7]; \
  148. pixels += line_size; \
  149. block += 8; \
  150. } \
  151. } \
  152. \
  153. static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
  154. DCTELEM *_block, \
  155. int line_size) \
  156. { \
  157. int i; \
  158. pixel *restrict pixels = (pixel *restrict)_pixels; \
  159. dctcoef *block = (dctcoef*)_block; \
  160. line_size /= sizeof(pixel); \
  161. \
  162. for(i=0;i<4;i++) { \
  163. pixels[0] += block[0]; \
  164. pixels[1] += block[1]; \
  165. pixels[2] += block[2]; \
  166. pixels[3] += block[3]; \
  167. pixels += line_size; \
  168. block += 4; \
  169. } \
  170. } \
  171. \
  172. static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
  173. { \
  174. memset(block, 0, sizeof(dctcoef)*64); \
  175. } \
  176. \
  177. /** \
  178. * memset(blocks, 0, sizeof(DCTELEM)*6*64) \
  179. */ \
  180. static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
  181. { \
  182. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  183. }
  184. DCTELEM_FUNCS(DCTELEM, _16)
  185. #if BIT_DEPTH > 8
  186. DCTELEM_FUNCS(dctcoef, _32)
  187. #endif
  188. #define PIXOP2(OPNAME, OP) \
  189. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  190. int i;\
  191. for(i=0; i<h; i++){\
  192. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  193. pixels+=line_size;\
  194. block +=line_size;\
  195. }\
  196. }\
  197. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  198. int i;\
  199. for(i=0; i<h; i++){\
  200. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  201. pixels+=line_size;\
  202. block +=line_size;\
  203. }\
  204. }\
  205. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  206. int i;\
  207. for(i=0; i<h; i++){\
  208. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  209. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  210. pixels+=line_size;\
  211. block +=line_size;\
  212. }\
  213. }\
  214. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  215. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  216. }\
  217. \
  218. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  219. int src_stride1, int src_stride2, int h){\
  220. int i;\
  221. for(i=0; i<h; i++){\
  222. pixel4 a,b;\
  223. a= AV_RN4P(&src1[i*src_stride1 ]);\
  224. b= AV_RN4P(&src2[i*src_stride2 ]);\
  225. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  226. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  227. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  228. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  229. }\
  230. }\
  231. \
  232. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  233. int src_stride1, int src_stride2, int h){\
  234. int i;\
  235. for(i=0; i<h; i++){\
  236. pixel4 a,b;\
  237. a= AV_RN4P(&src1[i*src_stride1 ]);\
  238. b= AV_RN4P(&src2[i*src_stride2 ]);\
  239. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  240. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  241. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  242. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  243. }\
  244. }\
  245. \
  246. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  247. int src_stride1, int src_stride2, int h){\
  248. int i;\
  249. for(i=0; i<h; i++){\
  250. pixel4 a,b;\
  251. a= AV_RN4P(&src1[i*src_stride1 ]);\
  252. b= AV_RN4P(&src2[i*src_stride2 ]);\
  253. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  254. }\
  255. }\
  256. \
  257. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  258. int src_stride1, int src_stride2, int h){\
  259. int i;\
  260. for(i=0; i<h; i++){\
  261. pixel4 a,b;\
  262. a= AV_RN2P(&src1[i*src_stride1 ]);\
  263. b= AV_RN2P(&src2[i*src_stride2 ]);\
  264. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  265. }\
  266. }\
  267. \
  268. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  269. int src_stride1, int src_stride2, int h){\
  270. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  271. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  272. }\
  273. \
  274. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  275. int src_stride1, int src_stride2, int h){\
  276. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  277. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  278. }\
  279. \
  280. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  281. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  282. }\
  283. \
  284. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  285. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  286. }\
  287. \
  288. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  289. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  290. }\
  291. \
  292. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  293. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  294. }\
  295. \
  296. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  297. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  298. /* FIXME HIGH BIT DEPTH */\
  299. int i;\
  300. for(i=0; i<h; i++){\
  301. uint32_t a, b, c, d, l0, l1, h0, h1;\
  302. a= AV_RN32(&src1[i*src_stride1]);\
  303. b= AV_RN32(&src2[i*src_stride2]);\
  304. c= AV_RN32(&src3[i*src_stride3]);\
  305. d= AV_RN32(&src4[i*src_stride4]);\
  306. l0= (a&0x03030303UL)\
  307. + (b&0x03030303UL)\
  308. + 0x02020202UL;\
  309. h0= ((a&0xFCFCFCFCUL)>>2)\
  310. + ((b&0xFCFCFCFCUL)>>2);\
  311. l1= (c&0x03030303UL)\
  312. + (d&0x03030303UL);\
  313. h1= ((c&0xFCFCFCFCUL)>>2)\
  314. + ((d&0xFCFCFCFCUL)>>2);\
  315. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  316. a= AV_RN32(&src1[i*src_stride1+4]);\
  317. b= AV_RN32(&src2[i*src_stride2+4]);\
  318. c= AV_RN32(&src3[i*src_stride3+4]);\
  319. d= AV_RN32(&src4[i*src_stride4+4]);\
  320. l0= (a&0x03030303UL)\
  321. + (b&0x03030303UL)\
  322. + 0x02020202UL;\
  323. h0= ((a&0xFCFCFCFCUL)>>2)\
  324. + ((b&0xFCFCFCFCUL)>>2);\
  325. l1= (c&0x03030303UL)\
  326. + (d&0x03030303UL);\
  327. h1= ((c&0xFCFCFCFCUL)>>2)\
  328. + ((d&0xFCFCFCFCUL)>>2);\
  329. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  330. }\
  331. }\
  332. \
  333. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  334. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  335. }\
  336. \
  337. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  338. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  339. }\
  340. \
  341. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  342. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  343. }\
  344. \
  345. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  346. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  347. }\
  348. \
  349. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  350. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  351. /* FIXME HIGH BIT DEPTH*/\
  352. int i;\
  353. for(i=0; i<h; i++){\
  354. uint32_t a, b, c, d, l0, l1, h0, h1;\
  355. a= AV_RN32(&src1[i*src_stride1]);\
  356. b= AV_RN32(&src2[i*src_stride2]);\
  357. c= AV_RN32(&src3[i*src_stride3]);\
  358. d= AV_RN32(&src4[i*src_stride4]);\
  359. l0= (a&0x03030303UL)\
  360. + (b&0x03030303UL)\
  361. + 0x01010101UL;\
  362. h0= ((a&0xFCFCFCFCUL)>>2)\
  363. + ((b&0xFCFCFCFCUL)>>2);\
  364. l1= (c&0x03030303UL)\
  365. + (d&0x03030303UL);\
  366. h1= ((c&0xFCFCFCFCUL)>>2)\
  367. + ((d&0xFCFCFCFCUL)>>2);\
  368. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  369. a= AV_RN32(&src1[i*src_stride1+4]);\
  370. b= AV_RN32(&src2[i*src_stride2+4]);\
  371. c= AV_RN32(&src3[i*src_stride3+4]);\
  372. d= AV_RN32(&src4[i*src_stride4+4]);\
  373. l0= (a&0x03030303UL)\
  374. + (b&0x03030303UL)\
  375. + 0x01010101UL;\
  376. h0= ((a&0xFCFCFCFCUL)>>2)\
  377. + ((b&0xFCFCFCFCUL)>>2);\
  378. l1= (c&0x03030303UL)\
  379. + (d&0x03030303UL);\
  380. h1= ((c&0xFCFCFCFCUL)>>2)\
  381. + ((d&0xFCFCFCFCUL)>>2);\
  382. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  383. }\
  384. }\
  385. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  386. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  387. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  388. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  389. }\
  390. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  391. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  392. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  393. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  394. }\
  395. \
  396. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
  397. {\
  398. int i, a0, b0, a1, b1;\
  399. pixel *block = (pixel*)_block;\
  400. const pixel *pixels = (const pixel*)_pixels;\
  401. line_size /= sizeof(pixel);\
  402. a0= pixels[0];\
  403. b0= pixels[1] + 2;\
  404. a0 += b0;\
  405. b0 += pixels[2];\
  406. \
  407. pixels+=line_size;\
  408. for(i=0; i<h; i+=2){\
  409. a1= pixels[0];\
  410. b1= pixels[1];\
  411. a1 += b1;\
  412. b1 += pixels[2];\
  413. \
  414. block[0]= (a1+a0)>>2; /* FIXME non put */\
  415. block[1]= (b1+b0)>>2;\
  416. \
  417. pixels+=line_size;\
  418. block +=line_size;\
  419. \
  420. a0= pixels[0];\
  421. b0= pixels[1] + 2;\
  422. a0 += b0;\
  423. b0 += pixels[2];\
  424. \
  425. block[0]= (a1+a0)>>2;\
  426. block[1]= (b1+b0)>>2;\
  427. pixels+=line_size;\
  428. block +=line_size;\
  429. }\
  430. }\
  431. \
  432. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  433. {\
  434. /* FIXME HIGH BIT DEPTH */\
  435. int i;\
  436. const uint32_t a= AV_RN32(pixels );\
  437. const uint32_t b= AV_RN32(pixels+1);\
  438. uint32_t l0= (a&0x03030303UL)\
  439. + (b&0x03030303UL)\
  440. + 0x02020202UL;\
  441. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  442. + ((b&0xFCFCFCFCUL)>>2);\
  443. uint32_t l1,h1;\
  444. \
  445. pixels+=line_size;\
  446. for(i=0; i<h; i+=2){\
  447. uint32_t a= AV_RN32(pixels );\
  448. uint32_t b= AV_RN32(pixels+1);\
  449. l1= (a&0x03030303UL)\
  450. + (b&0x03030303UL);\
  451. h1= ((a&0xFCFCFCFCUL)>>2)\
  452. + ((b&0xFCFCFCFCUL)>>2);\
  453. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  454. pixels+=line_size;\
  455. block +=line_size;\
  456. a= AV_RN32(pixels );\
  457. b= AV_RN32(pixels+1);\
  458. l0= (a&0x03030303UL)\
  459. + (b&0x03030303UL)\
  460. + 0x02020202UL;\
  461. h0= ((a&0xFCFCFCFCUL)>>2)\
  462. + ((b&0xFCFCFCFCUL)>>2);\
  463. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  464. pixels+=line_size;\
  465. block +=line_size;\
  466. }\
  467. }\
  468. \
  469. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  470. {\
  471. /* FIXME HIGH BIT DEPTH */\
  472. int j;\
  473. for(j=0; j<2; j++){\
  474. int i;\
  475. const uint32_t a= AV_RN32(pixels );\
  476. const uint32_t b= AV_RN32(pixels+1);\
  477. uint32_t l0= (a&0x03030303UL)\
  478. + (b&0x03030303UL)\
  479. + 0x02020202UL;\
  480. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  481. + ((b&0xFCFCFCFCUL)>>2);\
  482. uint32_t l1,h1;\
  483. \
  484. pixels+=line_size;\
  485. for(i=0; i<h; i+=2){\
  486. uint32_t a= AV_RN32(pixels );\
  487. uint32_t b= AV_RN32(pixels+1);\
  488. l1= (a&0x03030303UL)\
  489. + (b&0x03030303UL);\
  490. h1= ((a&0xFCFCFCFCUL)>>2)\
  491. + ((b&0xFCFCFCFCUL)>>2);\
  492. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  493. pixels+=line_size;\
  494. block +=line_size;\
  495. a= AV_RN32(pixels );\
  496. b= AV_RN32(pixels+1);\
  497. l0= (a&0x03030303UL)\
  498. + (b&0x03030303UL)\
  499. + 0x02020202UL;\
  500. h0= ((a&0xFCFCFCFCUL)>>2)\
  501. + ((b&0xFCFCFCFCUL)>>2);\
  502. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  503. pixels+=line_size;\
  504. block +=line_size;\
  505. }\
  506. pixels+=4-line_size*(h+1);\
  507. block +=4-line_size*h;\
  508. }\
  509. }\
  510. \
  511. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  512. {\
  513. /* FIXME HIGH BIT DEPTH */\
  514. int j;\
  515. for(j=0; j<2; j++){\
  516. int i;\
  517. const uint32_t a= AV_RN32(pixels );\
  518. const uint32_t b= AV_RN32(pixels+1);\
  519. uint32_t l0= (a&0x03030303UL)\
  520. + (b&0x03030303UL)\
  521. + 0x01010101UL;\
  522. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  523. + ((b&0xFCFCFCFCUL)>>2);\
  524. uint32_t l1,h1;\
  525. \
  526. pixels+=line_size;\
  527. for(i=0; i<h; i+=2){\
  528. uint32_t a= AV_RN32(pixels );\
  529. uint32_t b= AV_RN32(pixels+1);\
  530. l1= (a&0x03030303UL)\
  531. + (b&0x03030303UL);\
  532. h1= ((a&0xFCFCFCFCUL)>>2)\
  533. + ((b&0xFCFCFCFCUL)>>2);\
  534. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  535. pixels+=line_size;\
  536. block +=line_size;\
  537. a= AV_RN32(pixels );\
  538. b= AV_RN32(pixels+1);\
  539. l0= (a&0x03030303UL)\
  540. + (b&0x03030303UL)\
  541. + 0x01010101UL;\
  542. h0= ((a&0xFCFCFCFCUL)>>2)\
  543. + ((b&0xFCFCFCFCUL)>>2);\
  544. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  545. pixels+=line_size;\
  546. block +=line_size;\
  547. }\
  548. pixels+=4-line_size*(h+1);\
  549. block +=4-line_size*h;\
  550. }\
  551. }\
  552. \
  553. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  554. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  555. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  556. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  557. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  558. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  559. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  560. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  561. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  562. #define op_put(a, b) a = b
  563. PIXOP2(avg, op_avg)
  564. PIXOP2(put, op_put)
  565. #undef op_avg
  566. #undef op_put
  567. #define put_no_rnd_pixels8_c put_pixels8_c
  568. #define put_no_rnd_pixels16_c put_pixels16_c
  569. static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  570. FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
  571. }
  572. #define H264_CHROMA_MC(OPNAME, OP)\
  573. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  574. pixel *dst = (pixel*)_dst;\
  575. pixel *src = (pixel*)_src;\
  576. const int A=(8-x)*(8-y);\
  577. const int B=( x)*(8-y);\
  578. const int C=(8-x)*( y);\
  579. const int D=( x)*( y);\
  580. int i;\
  581. stride /= sizeof(pixel);\
  582. \
  583. assert(x<8 && y<8 && x>=0 && y>=0);\
  584. \
  585. if(D){\
  586. for(i=0; i<h; i++){\
  587. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  588. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  589. dst+= stride;\
  590. src+= stride;\
  591. }\
  592. }else{\
  593. const int E= B+C;\
  594. const int step= C ? stride : 1;\
  595. for(i=0; i<h; i++){\
  596. OP(dst[0], (A*src[0] + E*src[step+0]));\
  597. OP(dst[1], (A*src[1] + E*src[step+1]));\
  598. dst+= stride;\
  599. src+= stride;\
  600. }\
  601. }\
  602. }\
  603. \
  604. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  605. pixel *dst = (pixel*)_dst;\
  606. pixel *src = (pixel*)_src;\
  607. const int A=(8-x)*(8-y);\
  608. const int B=( x)*(8-y);\
  609. const int C=(8-x)*( y);\
  610. const int D=( x)*( y);\
  611. int i;\
  612. stride /= sizeof(pixel);\
  613. \
  614. assert(x<8 && y<8 && x>=0 && y>=0);\
  615. \
  616. if(D){\
  617. for(i=0; i<h; i++){\
  618. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  619. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  620. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  621. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  622. dst+= stride;\
  623. src+= stride;\
  624. }\
  625. }else{\
  626. const int E= B+C;\
  627. const int step= C ? stride : 1;\
  628. for(i=0; i<h; i++){\
  629. OP(dst[0], (A*src[0] + E*src[step+0]));\
  630. OP(dst[1], (A*src[1] + E*src[step+1]));\
  631. OP(dst[2], (A*src[2] + E*src[step+2]));\
  632. OP(dst[3], (A*src[3] + E*src[step+3]));\
  633. dst+= stride;\
  634. src+= stride;\
  635. }\
  636. }\
  637. }\
  638. \
  639. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  640. pixel *dst = (pixel*)_dst;\
  641. pixel *src = (pixel*)_src;\
  642. const int A=(8-x)*(8-y);\
  643. const int B=( x)*(8-y);\
  644. const int C=(8-x)*( y);\
  645. const int D=( x)*( y);\
  646. int i;\
  647. stride /= sizeof(pixel);\
  648. \
  649. assert(x<8 && y<8 && x>=0 && y>=0);\
  650. \
  651. if(D){\
  652. for(i=0; i<h; i++){\
  653. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  654. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  655. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  656. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  657. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  658. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  659. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  660. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  661. dst+= stride;\
  662. src+= stride;\
  663. }\
  664. }else{\
  665. const int E= B+C;\
  666. const int step= C ? stride : 1;\
  667. for(i=0; i<h; i++){\
  668. OP(dst[0], (A*src[0] + E*src[step+0]));\
  669. OP(dst[1], (A*src[1] + E*src[step+1]));\
  670. OP(dst[2], (A*src[2] + E*src[step+2]));\
  671. OP(dst[3], (A*src[3] + E*src[step+3]));\
  672. OP(dst[4], (A*src[4] + E*src[step+4]));\
  673. OP(dst[5], (A*src[5] + E*src[step+5]));\
  674. OP(dst[6], (A*src[6] + E*src[step+6]));\
  675. OP(dst[7], (A*src[7] + E*src[step+7]));\
  676. dst+= stride;\
  677. src+= stride;\
  678. }\
  679. }\
  680. }
  681. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  682. #define op_put(a, b) a = (((b) + 32)>>6)
  683. H264_CHROMA_MC(put_ , op_put)
  684. H264_CHROMA_MC(avg_ , op_avg)
  685. #undef op_avg
  686. #undef op_put
  687. #define H264_LOWPASS(OPNAME, OP, OP2) \
  688. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  689. const int h=2;\
  690. INIT_CLIP\
  691. int i;\
  692. pixel *dst = (pixel*)_dst;\
  693. pixel *src = (pixel*)_src;\
  694. dstStride /= sizeof(pixel);\
  695. srcStride /= sizeof(pixel);\
  696. for(i=0; i<h; i++)\
  697. {\
  698. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  699. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  700. dst+=dstStride;\
  701. src+=srcStride;\
  702. }\
  703. }\
  704. \
  705. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  706. const int w=2;\
  707. INIT_CLIP\
  708. int i;\
  709. pixel *dst = (pixel*)_dst;\
  710. pixel *src = (pixel*)_src;\
  711. dstStride /= sizeof(pixel);\
  712. srcStride /= sizeof(pixel);\
  713. for(i=0; i<w; i++)\
  714. {\
  715. const int srcB= src[-2*srcStride];\
  716. const int srcA= src[-1*srcStride];\
  717. const int src0= src[0 *srcStride];\
  718. const int src1= src[1 *srcStride];\
  719. const int src2= src[2 *srcStride];\
  720. const int src3= src[3 *srcStride];\
  721. const int src4= src[4 *srcStride];\
  722. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  723. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  724. dst++;\
  725. src++;\
  726. }\
  727. }\
  728. \
  729. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  730. const int h=2;\
  731. const int w=2;\
  732. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  733. INIT_CLIP\
  734. int i;\
  735. pixel *dst = (pixel*)_dst;\
  736. pixel *src = (pixel*)_src;\
  737. dstStride /= sizeof(pixel);\
  738. srcStride /= sizeof(pixel);\
  739. src -= 2*srcStride;\
  740. for(i=0; i<h+5; i++)\
  741. {\
  742. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  743. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  744. tmp+=tmpStride;\
  745. src+=srcStride;\
  746. }\
  747. tmp -= tmpStride*(h+5-2);\
  748. for(i=0; i<w; i++)\
  749. {\
  750. const int tmpB= tmp[-2*tmpStride] - pad;\
  751. const int tmpA= tmp[-1*tmpStride] - pad;\
  752. const int tmp0= tmp[0 *tmpStride] - pad;\
  753. const int tmp1= tmp[1 *tmpStride] - pad;\
  754. const int tmp2= tmp[2 *tmpStride] - pad;\
  755. const int tmp3= tmp[3 *tmpStride] - pad;\
  756. const int tmp4= tmp[4 *tmpStride] - pad;\
  757. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  758. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  759. dst++;\
  760. tmp++;\
  761. }\
  762. }\
  763. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  764. const int h=4;\
  765. INIT_CLIP\
  766. int i;\
  767. pixel *dst = (pixel*)_dst;\
  768. pixel *src = (pixel*)_src;\
  769. dstStride /= sizeof(pixel);\
  770. srcStride /= sizeof(pixel);\
  771. for(i=0; i<h; i++)\
  772. {\
  773. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  774. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  775. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  776. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  777. dst+=dstStride;\
  778. src+=srcStride;\
  779. }\
  780. }\
  781. \
  782. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  783. const int w=4;\
  784. INIT_CLIP\
  785. int i;\
  786. pixel *dst = (pixel*)_dst;\
  787. pixel *src = (pixel*)_src;\
  788. dstStride /= sizeof(pixel);\
  789. srcStride /= sizeof(pixel);\
  790. for(i=0; i<w; i++)\
  791. {\
  792. const int srcB= src[-2*srcStride];\
  793. const int srcA= src[-1*srcStride];\
  794. const int src0= src[0 *srcStride];\
  795. const int src1= src[1 *srcStride];\
  796. const int src2= src[2 *srcStride];\
  797. const int src3= src[3 *srcStride];\
  798. const int src4= src[4 *srcStride];\
  799. const int src5= src[5 *srcStride];\
  800. const int src6= src[6 *srcStride];\
  801. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  802. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  803. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  804. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  805. dst++;\
  806. src++;\
  807. }\
  808. }\
  809. \
  810. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  811. const int h=4;\
  812. const int w=4;\
  813. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  814. INIT_CLIP\
  815. int i;\
  816. pixel *dst = (pixel*)_dst;\
  817. pixel *src = (pixel*)_src;\
  818. dstStride /= sizeof(pixel);\
  819. srcStride /= sizeof(pixel);\
  820. src -= 2*srcStride;\
  821. for(i=0; i<h+5; i++)\
  822. {\
  823. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  824. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  825. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  826. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  827. tmp+=tmpStride;\
  828. src+=srcStride;\
  829. }\
  830. tmp -= tmpStride*(h+5-2);\
  831. for(i=0; i<w; i++)\
  832. {\
  833. const int tmpB= tmp[-2*tmpStride] - pad;\
  834. const int tmpA= tmp[-1*tmpStride] - pad;\
  835. const int tmp0= tmp[0 *tmpStride] - pad;\
  836. const int tmp1= tmp[1 *tmpStride] - pad;\
  837. const int tmp2= tmp[2 *tmpStride] - pad;\
  838. const int tmp3= tmp[3 *tmpStride] - pad;\
  839. const int tmp4= tmp[4 *tmpStride] - pad;\
  840. const int tmp5= tmp[5 *tmpStride] - pad;\
  841. const int tmp6= tmp[6 *tmpStride] - pad;\
  842. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  843. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  844. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  845. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  846. dst++;\
  847. tmp++;\
  848. }\
  849. }\
  850. \
  851. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  852. const int h=8;\
  853. INIT_CLIP\
  854. int i;\
  855. pixel *dst = (pixel*)_dst;\
  856. pixel *src = (pixel*)_src;\
  857. dstStride /= sizeof(pixel);\
  858. srcStride /= sizeof(pixel);\
  859. for(i=0; i<h; i++)\
  860. {\
  861. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  862. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  863. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  864. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  865. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  866. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  867. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  868. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  869. dst+=dstStride;\
  870. src+=srcStride;\
  871. }\
  872. }\
  873. \
  874. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  875. const int w=8;\
  876. INIT_CLIP\
  877. int i;\
  878. pixel *dst = (pixel*)_dst;\
  879. pixel *src = (pixel*)_src;\
  880. dstStride /= sizeof(pixel);\
  881. srcStride /= sizeof(pixel);\
  882. for(i=0; i<w; i++)\
  883. {\
  884. const int srcB= src[-2*srcStride];\
  885. const int srcA= src[-1*srcStride];\
  886. const int src0= src[0 *srcStride];\
  887. const int src1= src[1 *srcStride];\
  888. const int src2= src[2 *srcStride];\
  889. const int src3= src[3 *srcStride];\
  890. const int src4= src[4 *srcStride];\
  891. const int src5= src[5 *srcStride];\
  892. const int src6= src[6 *srcStride];\
  893. const int src7= src[7 *srcStride];\
  894. const int src8= src[8 *srcStride];\
  895. const int src9= src[9 *srcStride];\
  896. const int src10=src[10*srcStride];\
  897. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  898. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  899. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  900. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  901. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  902. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  903. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  904. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  905. dst++;\
  906. src++;\
  907. }\
  908. }\
  909. \
  910. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  911. const int h=8;\
  912. const int w=8;\
  913. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  914. INIT_CLIP\
  915. int i;\
  916. pixel *dst = (pixel*)_dst;\
  917. pixel *src = (pixel*)_src;\
  918. dstStride /= sizeof(pixel);\
  919. srcStride /= sizeof(pixel);\
  920. src -= 2*srcStride;\
  921. for(i=0; i<h+5; i++)\
  922. {\
  923. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  924. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  925. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  926. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  927. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  928. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  929. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  930. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  931. tmp+=tmpStride;\
  932. src+=srcStride;\
  933. }\
  934. tmp -= tmpStride*(h+5-2);\
  935. for(i=0; i<w; i++)\
  936. {\
  937. const int tmpB= tmp[-2*tmpStride] - pad;\
  938. const int tmpA= tmp[-1*tmpStride] - pad;\
  939. const int tmp0= tmp[0 *tmpStride] - pad;\
  940. const int tmp1= tmp[1 *tmpStride] - pad;\
  941. const int tmp2= tmp[2 *tmpStride] - pad;\
  942. const int tmp3= tmp[3 *tmpStride] - pad;\
  943. const int tmp4= tmp[4 *tmpStride] - pad;\
  944. const int tmp5= tmp[5 *tmpStride] - pad;\
  945. const int tmp6= tmp[6 *tmpStride] - pad;\
  946. const int tmp7= tmp[7 *tmpStride] - pad;\
  947. const int tmp8= tmp[8 *tmpStride] - pad;\
  948. const int tmp9= tmp[9 *tmpStride] - pad;\
  949. const int tmp10=tmp[10*tmpStride] - pad;\
  950. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  951. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  952. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  953. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  954. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  955. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  956. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  957. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  958. dst++;\
  959. tmp++;\
  960. }\
  961. }\
  962. \
  963. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  964. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  965. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  966. src += 8*srcStride;\
  967. dst += 8*dstStride;\
  968. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  969. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  970. }\
  971. \
  972. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  973. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  974. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  975. src += 8*srcStride;\
  976. dst += 8*dstStride;\
  977. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  978. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  979. }\
  980. \
  981. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  982. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  983. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  984. src += 8*srcStride;\
  985. dst += 8*dstStride;\
  986. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  987. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  988. }\
  989. #define H264_MC(OPNAME, SIZE) \
  990. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  991. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  992. }\
  993. \
  994. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  995. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  996. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  997. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  998. }\
  999. \
  1000. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  1001. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  1002. }\
  1003. \
  1004. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1005. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1006. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1007. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1008. }\
  1009. \
  1010. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1011. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1012. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1013. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1014. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1015. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1016. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1017. }\
  1018. \
  1019. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1020. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1021. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1022. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1023. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1024. }\
  1025. \
  1026. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1027. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1028. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1029. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1030. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1031. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1032. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1033. }\
  1034. \
  1035. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1036. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1037. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1038. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1039. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1040. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1041. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1042. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1043. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1044. }\
  1045. \
  1046. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1047. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1048. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1049. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1050. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1051. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1052. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1053. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1054. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1055. }\
  1056. \
  1057. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1058. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1059. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1060. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1061. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1062. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1063. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1064. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1065. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1066. }\
  1067. \
  1068. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1069. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1070. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1071. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1072. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1073. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1074. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1075. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1076. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1077. }\
  1078. \
  1079. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1080. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1081. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1082. }\
  1083. \
  1084. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1085. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1086. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1087. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1088. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1089. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1090. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1091. }\
  1092. \
  1093. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1094. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1095. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1096. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1097. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1098. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1099. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1100. }\
  1101. \
  1102. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1103. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1104. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1105. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1106. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1107. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1108. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1109. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1110. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1111. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1112. }\
  1113. \
  1114. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1115. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1116. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1117. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1118. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1119. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1120. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1121. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1122. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1123. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1124. }\
  1125. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1126. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1127. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1128. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1129. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1130. H264_LOWPASS(put_ , op_put, op2_put)
  1131. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1132. H264_MC(put_, 2)
  1133. H264_MC(put_, 4)
  1134. H264_MC(put_, 8)
  1135. H264_MC(put_, 16)
  1136. H264_MC(avg_, 4)
  1137. H264_MC(avg_, 8)
  1138. H264_MC(avg_, 16)
  1139. #undef op_avg
  1140. #undef op_put
  1141. #undef op2_avg
  1142. #undef op2_put
  1143. #if BIT_DEPTH == 8
  1144. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1145. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1146. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1147. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1148. #elif BIT_DEPTH == 9
  1149. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1150. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1151. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1152. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1153. #elif BIT_DEPTH == 10
  1154. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1155. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1156. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1157. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1158. #endif
  1159. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1160. FUNCC(put_pixels8)(dst, src, stride, 8);
  1161. }
  1162. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1163. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1164. }
  1165. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1166. FUNCC(put_pixels16)(dst, src, stride, 16);
  1167. }
  1168. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1169. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1170. }