You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1212 lines
53KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)p_buf;
  78. int wrap = p_wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. #define DCTELEM_FUNCS(dctcoef, suffix) \
  107. static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block, \
  108. const uint8_t *_pixels, \
  109. int line_size) \
  110. { \
  111. const pixel *pixels = (const pixel *) _pixels; \
  112. dctcoef *av_restrict block = (dctcoef *) _block; \
  113. int i; \
  114. \
  115. /* read the pixels */ \
  116. for(i=0;i<8;i++) { \
  117. block[0] = pixels[0]; \
  118. block[1] = pixels[1]; \
  119. block[2] = pixels[2]; \
  120. block[3] = pixels[3]; \
  121. block[4] = pixels[4]; \
  122. block[5] = pixels[5]; \
  123. block[6] = pixels[6]; \
  124. block[7] = pixels[7]; \
  125. pixels += line_size / sizeof(pixel); \
  126. block += 8; \
  127. } \
  128. } \
  129. \
  130. static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels, \
  131. int16_t *_block, \
  132. int line_size) \
  133. { \
  134. int i; \
  135. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  136. dctcoef *block = (dctcoef*)_block; \
  137. line_size /= sizeof(pixel); \
  138. \
  139. for(i=0;i<8;i++) { \
  140. pixels[0] += block[0]; \
  141. pixels[1] += block[1]; \
  142. pixels[2] += block[2]; \
  143. pixels[3] += block[3]; \
  144. pixels[4] += block[4]; \
  145. pixels[5] += block[5]; \
  146. pixels[6] += block[6]; \
  147. pixels[7] += block[7]; \
  148. pixels += line_size; \
  149. block += 8; \
  150. } \
  151. } \
  152. \
  153. static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels, \
  154. int16_t *_block, \
  155. int line_size) \
  156. { \
  157. int i; \
  158. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  159. dctcoef *block = (dctcoef*)_block; \
  160. line_size /= sizeof(pixel); \
  161. \
  162. for(i=0;i<4;i++) { \
  163. pixels[0] += block[0]; \
  164. pixels[1] += block[1]; \
  165. pixels[2] += block[2]; \
  166. pixels[3] += block[3]; \
  167. pixels += line_size; \
  168. block += 4; \
  169. } \
  170. } \
  171. \
  172. static void FUNCC(clear_block ## suffix)(int16_t *block) \
  173. { \
  174. memset(block, 0, sizeof(dctcoef)*64); \
  175. } \
  176. \
  177. /** \
  178. * memset(blocks, 0, sizeof(int16_t)*6*64) \
  179. */ \
  180. static void FUNCC(clear_blocks ## suffix)(int16_t *blocks) \
  181. { \
  182. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  183. }
  184. DCTELEM_FUNCS(int16_t, _16)
  185. #if BIT_DEPTH > 8
  186. DCTELEM_FUNCS(dctcoef, _32)
  187. #endif
  188. #define PIXOP3(OPNAME, OP) \
  189. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  190. int i;\
  191. for(i=0; i<h; i++){\
  192. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  193. pixels+=line_size;\
  194. block +=line_size;\
  195. }\
  196. }\
  197. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  198. int i;\
  199. for(i=0; i<h; i++){\
  200. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  201. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  202. pixels+=line_size;\
  203. block +=line_size;\
  204. }\
  205. }\
  206. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  207. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  208. }\
  209. \
  210. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  211. int src_stride1, int src_stride2, int h){\
  212. int i;\
  213. for(i=0; i<h; i++){\
  214. pixel4 a,b;\
  215. a= AV_RN4P(&src1[i*src_stride1 ]);\
  216. b= AV_RN4P(&src2[i*src_stride2 ]);\
  217. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  218. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  219. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  220. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  221. }\
  222. }\
  223. \
  224. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  225. int src_stride1, int src_stride2, int h){\
  226. int i;\
  227. for(i=0; i<h; i++){\
  228. pixel4 a,b;\
  229. a= AV_RN4P(&src1[i*src_stride1 ]);\
  230. b= AV_RN4P(&src2[i*src_stride2 ]);\
  231. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  232. }\
  233. }\
  234. \
  235. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  236. int src_stride1, int src_stride2, int h){\
  237. int i;\
  238. for(i=0; i<h; i++){\
  239. pixel4 a,b;\
  240. a= AV_RN2P(&src1[i*src_stride1 ]);\
  241. b= AV_RN2P(&src2[i*src_stride2 ]);\
  242. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  243. }\
  244. }\
  245. \
  246. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  247. int src_stride1, int src_stride2, int h){\
  248. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  249. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  250. }\
  251. \
  252. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))
  253. #define PIXOP4(OPNAME, OP) \
  254. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  255. int i;\
  256. for(i=0; i<h; i++){\
  257. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  258. pixels+=line_size;\
  259. block +=line_size;\
  260. }\
  261. }\
  262. PIXOP3(OPNAME, OP)
  263. #define PIXOP2(OPNAME, OP) \
  264. PIXOP4(OPNAME, OP)\
  265. \
  266. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  267. int src_stride1, int src_stride2, int h){\
  268. int i;\
  269. for(i=0; i<h; i++){\
  270. pixel4 a,b;\
  271. a= AV_RN4P(&src1[i*src_stride1 ]);\
  272. b= AV_RN4P(&src2[i*src_stride2 ]);\
  273. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  274. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  275. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  276. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  277. }\
  278. }\
  279. \
  280. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  281. int src_stride1, int src_stride2, int h){\
  282. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  283. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  284. }\
  285. \
  286. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  287. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  288. }\
  289. \
  290. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  291. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  292. }\
  293. \
  294. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  295. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  296. }\
  297. \
  298. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  299. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  300. }\
  301. \
  302. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  303. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  304. /* FIXME HIGH BIT DEPTH */\
  305. int i;\
  306. for(i=0; i<h; i++){\
  307. uint32_t a, b, c, d, l0, l1, h0, h1;\
  308. a= AV_RN32(&src1[i*src_stride1]);\
  309. b= AV_RN32(&src2[i*src_stride2]);\
  310. c= AV_RN32(&src3[i*src_stride3]);\
  311. d= AV_RN32(&src4[i*src_stride4]);\
  312. l0= (a&0x03030303UL)\
  313. + (b&0x03030303UL)\
  314. + 0x02020202UL;\
  315. h0= ((a&0xFCFCFCFCUL)>>2)\
  316. + ((b&0xFCFCFCFCUL)>>2);\
  317. l1= (c&0x03030303UL)\
  318. + (d&0x03030303UL);\
  319. h1= ((c&0xFCFCFCFCUL)>>2)\
  320. + ((d&0xFCFCFCFCUL)>>2);\
  321. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  322. a= AV_RN32(&src1[i*src_stride1+4]);\
  323. b= AV_RN32(&src2[i*src_stride2+4]);\
  324. c= AV_RN32(&src3[i*src_stride3+4]);\
  325. d= AV_RN32(&src4[i*src_stride4+4]);\
  326. l0= (a&0x03030303UL)\
  327. + (b&0x03030303UL)\
  328. + 0x02020202UL;\
  329. h0= ((a&0xFCFCFCFCUL)>>2)\
  330. + ((b&0xFCFCFCFCUL)>>2);\
  331. l1= (c&0x03030303UL)\
  332. + (d&0x03030303UL);\
  333. h1= ((c&0xFCFCFCFCUL)>>2)\
  334. + ((d&0xFCFCFCFCUL)>>2);\
  335. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  336. }\
  337. }\
  338. \
  339. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  340. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  341. }\
  342. \
  343. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  344. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  345. }\
  346. \
  347. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  348. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  349. }\
  350. \
  351. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  352. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  353. }\
  354. \
  355. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  356. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  357. /* FIXME HIGH BIT DEPTH*/\
  358. int i;\
  359. for(i=0; i<h; i++){\
  360. uint32_t a, b, c, d, l0, l1, h0, h1;\
  361. a= AV_RN32(&src1[i*src_stride1]);\
  362. b= AV_RN32(&src2[i*src_stride2]);\
  363. c= AV_RN32(&src3[i*src_stride3]);\
  364. d= AV_RN32(&src4[i*src_stride4]);\
  365. l0= (a&0x03030303UL)\
  366. + (b&0x03030303UL)\
  367. + 0x01010101UL;\
  368. h0= ((a&0xFCFCFCFCUL)>>2)\
  369. + ((b&0xFCFCFCFCUL)>>2);\
  370. l1= (c&0x03030303UL)\
  371. + (d&0x03030303UL);\
  372. h1= ((c&0xFCFCFCFCUL)>>2)\
  373. + ((d&0xFCFCFCFCUL)>>2);\
  374. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  375. a= AV_RN32(&src1[i*src_stride1+4]);\
  376. b= AV_RN32(&src2[i*src_stride2+4]);\
  377. c= AV_RN32(&src3[i*src_stride3+4]);\
  378. d= AV_RN32(&src4[i*src_stride4+4]);\
  379. l0= (a&0x03030303UL)\
  380. + (b&0x03030303UL)\
  381. + 0x01010101UL;\
  382. h0= ((a&0xFCFCFCFCUL)>>2)\
  383. + ((b&0xFCFCFCFCUL)>>2);\
  384. l1= (c&0x03030303UL)\
  385. + (d&0x03030303UL);\
  386. h1= ((c&0xFCFCFCFCUL)>>2)\
  387. + ((d&0xFCFCFCFCUL)>>2);\
  388. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  389. }\
  390. }\
  391. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  392. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  393. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  394. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  395. }\
  396. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  397. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  398. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  399. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  400. }\
  401. \
  402. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t *p_pixels, int line_size, int h)\
  403. {\
  404. int i, a0, b0, a1, b1;\
  405. pixel *block = (pixel*)p_block;\
  406. const pixel *pixels = (const pixel*)p_pixels;\
  407. line_size >>= sizeof(pixel)-1;\
  408. a0= pixels[0];\
  409. b0= pixels[1] + 2;\
  410. a0 += b0;\
  411. b0 += pixels[2];\
  412. \
  413. pixels+=line_size;\
  414. for(i=0; i<h; i+=2){\
  415. a1= pixels[0];\
  416. b1= pixels[1];\
  417. a1 += b1;\
  418. b1 += pixels[2];\
  419. \
  420. block[0]= (a1+a0)>>2; /* FIXME non put */\
  421. block[1]= (b1+b0)>>2;\
  422. \
  423. pixels+=line_size;\
  424. block +=line_size;\
  425. \
  426. a0= pixels[0];\
  427. b0= pixels[1] + 2;\
  428. a0 += b0;\
  429. b0 += pixels[2];\
  430. \
  431. block[0]= (a1+a0)>>2;\
  432. block[1]= (b1+b0)>>2;\
  433. pixels+=line_size;\
  434. block +=line_size;\
  435. }\
  436. }\
  437. \
  438. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  439. {\
  440. /* FIXME HIGH BIT DEPTH */\
  441. int i;\
  442. const uint32_t a= AV_RN32(pixels );\
  443. const uint32_t b= AV_RN32(pixels+1);\
  444. uint32_t l0= (a&0x03030303UL)\
  445. + (b&0x03030303UL)\
  446. + 0x02020202UL;\
  447. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  448. + ((b&0xFCFCFCFCUL)>>2);\
  449. uint32_t l1,h1;\
  450. \
  451. pixels+=line_size;\
  452. for(i=0; i<h; i+=2){\
  453. uint32_t a= AV_RN32(pixels );\
  454. uint32_t b= AV_RN32(pixels+1);\
  455. l1= (a&0x03030303UL)\
  456. + (b&0x03030303UL);\
  457. h1= ((a&0xFCFCFCFCUL)>>2)\
  458. + ((b&0xFCFCFCFCUL)>>2);\
  459. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  460. pixels+=line_size;\
  461. block +=line_size;\
  462. a= AV_RN32(pixels );\
  463. b= AV_RN32(pixels+1);\
  464. l0= (a&0x03030303UL)\
  465. + (b&0x03030303UL)\
  466. + 0x02020202UL;\
  467. h0= ((a&0xFCFCFCFCUL)>>2)\
  468. + ((b&0xFCFCFCFCUL)>>2);\
  469. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  470. pixels+=line_size;\
  471. block +=line_size;\
  472. }\
  473. }\
  474. \
  475. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  476. {\
  477. /* FIXME HIGH BIT DEPTH */\
  478. int j;\
  479. for(j=0; j<2; j++){\
  480. int i;\
  481. const uint32_t a= AV_RN32(pixels );\
  482. const uint32_t b= AV_RN32(pixels+1);\
  483. uint32_t l0= (a&0x03030303UL)\
  484. + (b&0x03030303UL)\
  485. + 0x02020202UL;\
  486. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  487. + ((b&0xFCFCFCFCUL)>>2);\
  488. uint32_t l1,h1;\
  489. \
  490. pixels+=line_size;\
  491. for(i=0; i<h; i+=2){\
  492. uint32_t a= AV_RN32(pixels );\
  493. uint32_t b= AV_RN32(pixels+1);\
  494. l1= (a&0x03030303UL)\
  495. + (b&0x03030303UL);\
  496. h1= ((a&0xFCFCFCFCUL)>>2)\
  497. + ((b&0xFCFCFCFCUL)>>2);\
  498. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  499. pixels+=line_size;\
  500. block +=line_size;\
  501. a= AV_RN32(pixels );\
  502. b= AV_RN32(pixels+1);\
  503. l0= (a&0x03030303UL)\
  504. + (b&0x03030303UL)\
  505. + 0x02020202UL;\
  506. h0= ((a&0xFCFCFCFCUL)>>2)\
  507. + ((b&0xFCFCFCFCUL)>>2);\
  508. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  509. pixels+=line_size;\
  510. block +=line_size;\
  511. }\
  512. pixels+=4-line_size*(h+1);\
  513. block +=4-line_size*h;\
  514. }\
  515. }\
  516. \
  517. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  518. {\
  519. /* FIXME HIGH BIT DEPTH */\
  520. int j;\
  521. for(j=0; j<2; j++){\
  522. int i;\
  523. const uint32_t a= AV_RN32(pixels );\
  524. const uint32_t b= AV_RN32(pixels+1);\
  525. uint32_t l0= (a&0x03030303UL)\
  526. + (b&0x03030303UL)\
  527. + 0x01010101UL;\
  528. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  529. + ((b&0xFCFCFCFCUL)>>2);\
  530. uint32_t l1,h1;\
  531. \
  532. pixels+=line_size;\
  533. for(i=0; i<h; i+=2){\
  534. uint32_t a= AV_RN32(pixels );\
  535. uint32_t b= AV_RN32(pixels+1);\
  536. l1= (a&0x03030303UL)\
  537. + (b&0x03030303UL);\
  538. h1= ((a&0xFCFCFCFCUL)>>2)\
  539. + ((b&0xFCFCFCFCUL)>>2);\
  540. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  541. pixels+=line_size;\
  542. block +=line_size;\
  543. a= AV_RN32(pixels );\
  544. b= AV_RN32(pixels+1);\
  545. l0= (a&0x03030303UL)\
  546. + (b&0x03030303UL)\
  547. + 0x01010101UL;\
  548. h0= ((a&0xFCFCFCFCUL)>>2)\
  549. + ((b&0xFCFCFCFCUL)>>2);\
  550. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  551. pixels+=line_size;\
  552. block +=line_size;\
  553. }\
  554. pixels+=4-line_size*(h+1);\
  555. block +=4-line_size*h;\
  556. }\
  557. }\
  558. \
  559. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  560. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  561. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  562. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  563. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  564. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  565. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  566. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  567. #define op_put(a, b) a = b
  568. #if BIT_DEPTH == 8
  569. PIXOP2(avg, op_avg)
  570. PIXOP2(put, op_put)
  571. #else
  572. PIXOP3(avg, op_avg)
  573. PIXOP4(put, op_put)
  574. #endif
  575. #undef op_avg
  576. #undef op_put
  577. #define H264_CHROMA_MC(OPNAME, OP)\
  578. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  579. pixel *dst = (pixel*)p_dst;\
  580. pixel *src = (pixel*)p_src;\
  581. const int A=(8-x)*(8-y);\
  582. const int B=( x)*(8-y);\
  583. const int C=(8-x)*( y);\
  584. const int D=( x)*( y);\
  585. int i;\
  586. stride >>= sizeof(pixel)-1;\
  587. \
  588. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  589. \
  590. if(D){\
  591. for(i=0; i<h; i++){\
  592. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  593. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  594. dst+= stride;\
  595. src+= stride;\
  596. }\
  597. }else{\
  598. const int E= B+C;\
  599. const int step= C ? stride : 1;\
  600. for(i=0; i<h; i++){\
  601. OP(dst[0], (A*src[0] + E*src[step+0]));\
  602. OP(dst[1], (A*src[1] + E*src[step+1]));\
  603. dst+= stride;\
  604. src+= stride;\
  605. }\
  606. }\
  607. }\
  608. \
  609. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  610. pixel *dst = (pixel*)p_dst;\
  611. pixel *src = (pixel*)p_src;\
  612. const int A=(8-x)*(8-y);\
  613. const int B=( x)*(8-y);\
  614. const int C=(8-x)*( y);\
  615. const int D=( x)*( y);\
  616. int i;\
  617. stride >>= sizeof(pixel)-1;\
  618. \
  619. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  620. \
  621. if(D){\
  622. for(i=0; i<h; i++){\
  623. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  624. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  625. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  626. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  627. dst+= stride;\
  628. src+= stride;\
  629. }\
  630. }else{\
  631. const int E= B+C;\
  632. const int step= C ? stride : 1;\
  633. for(i=0; i<h; i++){\
  634. OP(dst[0], (A*src[0] + E*src[step+0]));\
  635. OP(dst[1], (A*src[1] + E*src[step+1]));\
  636. OP(dst[2], (A*src[2] + E*src[step+2]));\
  637. OP(dst[3], (A*src[3] + E*src[step+3]));\
  638. dst+= stride;\
  639. src+= stride;\
  640. }\
  641. }\
  642. }\
  643. \
  644. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  645. pixel *dst = (pixel*)p_dst;\
  646. pixel *src = (pixel*)p_src;\
  647. const int A=(8-x)*(8-y);\
  648. const int B=( x)*(8-y);\
  649. const int C=(8-x)*( y);\
  650. const int D=( x)*( y);\
  651. int i;\
  652. stride >>= sizeof(pixel)-1;\
  653. \
  654. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  655. \
  656. if(D){\
  657. for(i=0; i<h; i++){\
  658. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  659. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  660. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  661. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  662. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  663. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  664. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  665. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  666. dst+= stride;\
  667. src+= stride;\
  668. }\
  669. }else{\
  670. const int E= B+C;\
  671. const int step= C ? stride : 1;\
  672. for(i=0; i<h; i++){\
  673. OP(dst[0], (A*src[0] + E*src[step+0]));\
  674. OP(dst[1], (A*src[1] + E*src[step+1]));\
  675. OP(dst[2], (A*src[2] + E*src[step+2]));\
  676. OP(dst[3], (A*src[3] + E*src[step+3]));\
  677. OP(dst[4], (A*src[4] + E*src[step+4]));\
  678. OP(dst[5], (A*src[5] + E*src[step+5]));\
  679. OP(dst[6], (A*src[6] + E*src[step+6]));\
  680. OP(dst[7], (A*src[7] + E*src[step+7]));\
  681. dst+= stride;\
  682. src+= stride;\
  683. }\
  684. }\
  685. }
  686. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  687. #define op_put(a, b) a = (((b) + 32)>>6)
  688. H264_CHROMA_MC(put_ , op_put)
  689. H264_CHROMA_MC(avg_ , op_avg)
  690. #undef op_avg
  691. #undef op_put
  692. #define H264_LOWPASS(OPNAME, OP, OP2) \
  693. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  694. const int h=2;\
  695. INIT_CLIP\
  696. int i;\
  697. pixel *dst = (pixel*)p_dst;\
  698. pixel *src = (pixel*)p_src;\
  699. dstStride >>= sizeof(pixel)-1;\
  700. srcStride >>= sizeof(pixel)-1;\
  701. for(i=0; i<h; i++)\
  702. {\
  703. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  704. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  705. dst+=dstStride;\
  706. src+=srcStride;\
  707. }\
  708. }\
  709. \
  710. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  711. const int w=2;\
  712. INIT_CLIP\
  713. int i;\
  714. pixel *dst = (pixel*)p_dst;\
  715. pixel *src = (pixel*)p_src;\
  716. dstStride >>= sizeof(pixel)-1;\
  717. srcStride >>= sizeof(pixel)-1;\
  718. for(i=0; i<w; i++)\
  719. {\
  720. const int srcB= src[-2*srcStride];\
  721. const int srcA= src[-1*srcStride];\
  722. const int src0= src[0 *srcStride];\
  723. const int src1= src[1 *srcStride];\
  724. const int src2= src[2 *srcStride];\
  725. const int src3= src[3 *srcStride];\
  726. const int src4= src[4 *srcStride];\
  727. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  728. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  729. dst++;\
  730. src++;\
  731. }\
  732. }\
  733. \
  734. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  735. const int h=2;\
  736. const int w=2;\
  737. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  738. INIT_CLIP\
  739. int i;\
  740. pixel *dst = (pixel*)p_dst;\
  741. pixel *src = (pixel*)p_src;\
  742. dstStride >>= sizeof(pixel)-1;\
  743. srcStride >>= sizeof(pixel)-1;\
  744. src -= 2*srcStride;\
  745. for(i=0; i<h+5; i++)\
  746. {\
  747. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  748. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  749. tmp+=tmpStride;\
  750. src+=srcStride;\
  751. }\
  752. tmp -= tmpStride*(h+5-2);\
  753. for(i=0; i<w; i++)\
  754. {\
  755. const int tmpB= tmp[-2*tmpStride] - pad;\
  756. const int tmpA= tmp[-1*tmpStride] - pad;\
  757. const int tmp0= tmp[0 *tmpStride] - pad;\
  758. const int tmp1= tmp[1 *tmpStride] - pad;\
  759. const int tmp2= tmp[2 *tmpStride] - pad;\
  760. const int tmp3= tmp[3 *tmpStride] - pad;\
  761. const int tmp4= tmp[4 *tmpStride] - pad;\
  762. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  763. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  764. dst++;\
  765. tmp++;\
  766. }\
  767. }\
  768. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  769. const int h=4;\
  770. INIT_CLIP\
  771. int i;\
  772. pixel *dst = (pixel*)p_dst;\
  773. pixel *src = (pixel*)p_src;\
  774. dstStride >>= sizeof(pixel)-1;\
  775. srcStride >>= sizeof(pixel)-1;\
  776. for(i=0; i<h; i++)\
  777. {\
  778. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  779. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  780. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  781. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  782. dst+=dstStride;\
  783. src+=srcStride;\
  784. }\
  785. }\
  786. \
  787. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  788. const int w=4;\
  789. INIT_CLIP\
  790. int i;\
  791. pixel *dst = (pixel*)p_dst;\
  792. pixel *src = (pixel*)p_src;\
  793. dstStride >>= sizeof(pixel)-1;\
  794. srcStride >>= sizeof(pixel)-1;\
  795. for(i=0; i<w; i++)\
  796. {\
  797. const int srcB= src[-2*srcStride];\
  798. const int srcA= src[-1*srcStride];\
  799. const int src0= src[0 *srcStride];\
  800. const int src1= src[1 *srcStride];\
  801. const int src2= src[2 *srcStride];\
  802. const int src3= src[3 *srcStride];\
  803. const int src4= src[4 *srcStride];\
  804. const int src5= src[5 *srcStride];\
  805. const int src6= src[6 *srcStride];\
  806. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  807. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  808. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  809. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  810. dst++;\
  811. src++;\
  812. }\
  813. }\
  814. \
  815. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  816. const int h=4;\
  817. const int w=4;\
  818. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  819. INIT_CLIP\
  820. int i;\
  821. pixel *dst = (pixel*)p_dst;\
  822. pixel *src = (pixel*)p_src;\
  823. dstStride >>= sizeof(pixel)-1;\
  824. srcStride >>= sizeof(pixel)-1;\
  825. src -= 2*srcStride;\
  826. for(i=0; i<h+5; i++)\
  827. {\
  828. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  829. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  830. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  831. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  832. tmp+=tmpStride;\
  833. src+=srcStride;\
  834. }\
  835. tmp -= tmpStride*(h+5-2);\
  836. for(i=0; i<w; i++)\
  837. {\
  838. const int tmpB= tmp[-2*tmpStride] - pad;\
  839. const int tmpA= tmp[-1*tmpStride] - pad;\
  840. const int tmp0= tmp[0 *tmpStride] - pad;\
  841. const int tmp1= tmp[1 *tmpStride] - pad;\
  842. const int tmp2= tmp[2 *tmpStride] - pad;\
  843. const int tmp3= tmp[3 *tmpStride] - pad;\
  844. const int tmp4= tmp[4 *tmpStride] - pad;\
  845. const int tmp5= tmp[5 *tmpStride] - pad;\
  846. const int tmp6= tmp[6 *tmpStride] - pad;\
  847. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  848. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  849. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  850. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  851. dst++;\
  852. tmp++;\
  853. }\
  854. }\
  855. \
  856. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  857. const int h=8;\
  858. INIT_CLIP\
  859. int i;\
  860. pixel *dst = (pixel*)p_dst;\
  861. pixel *src = (pixel*)p_src;\
  862. dstStride >>= sizeof(pixel)-1;\
  863. srcStride >>= sizeof(pixel)-1;\
  864. for(i=0; i<h; i++)\
  865. {\
  866. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  867. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  868. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  869. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  870. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  871. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  872. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  873. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  874. dst+=dstStride;\
  875. src+=srcStride;\
  876. }\
  877. }\
  878. \
  879. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  880. const int w=8;\
  881. INIT_CLIP\
  882. int i;\
  883. pixel *dst = (pixel*)p_dst;\
  884. pixel *src = (pixel*)p_src;\
  885. dstStride >>= sizeof(pixel)-1;\
  886. srcStride >>= sizeof(pixel)-1;\
  887. for(i=0; i<w; i++)\
  888. {\
  889. const int srcB= src[-2*srcStride];\
  890. const int srcA= src[-1*srcStride];\
  891. const int src0= src[0 *srcStride];\
  892. const int src1= src[1 *srcStride];\
  893. const int src2= src[2 *srcStride];\
  894. const int src3= src[3 *srcStride];\
  895. const int src4= src[4 *srcStride];\
  896. const int src5= src[5 *srcStride];\
  897. const int src6= src[6 *srcStride];\
  898. const int src7= src[7 *srcStride];\
  899. const int src8= src[8 *srcStride];\
  900. const int src9= src[9 *srcStride];\
  901. const int src10=src[10*srcStride];\
  902. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  903. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  904. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  905. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  906. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  907. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  908. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  909. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  910. dst++;\
  911. src++;\
  912. }\
  913. }\
  914. \
  915. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  916. const int h=8;\
  917. const int w=8;\
  918. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  919. INIT_CLIP\
  920. int i;\
  921. pixel *dst = (pixel*)p_dst;\
  922. pixel *src = (pixel*)p_src;\
  923. dstStride >>= sizeof(pixel)-1;\
  924. srcStride >>= sizeof(pixel)-1;\
  925. src -= 2*srcStride;\
  926. for(i=0; i<h+5; i++)\
  927. {\
  928. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  929. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  930. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  931. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  932. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  933. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  934. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  935. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  936. tmp+=tmpStride;\
  937. src+=srcStride;\
  938. }\
  939. tmp -= tmpStride*(h+5-2);\
  940. for(i=0; i<w; i++)\
  941. {\
  942. const int tmpB= tmp[-2*tmpStride] - pad;\
  943. const int tmpA= tmp[-1*tmpStride] - pad;\
  944. const int tmp0= tmp[0 *tmpStride] - pad;\
  945. const int tmp1= tmp[1 *tmpStride] - pad;\
  946. const int tmp2= tmp[2 *tmpStride] - pad;\
  947. const int tmp3= tmp[3 *tmpStride] - pad;\
  948. const int tmp4= tmp[4 *tmpStride] - pad;\
  949. const int tmp5= tmp[5 *tmpStride] - pad;\
  950. const int tmp6= tmp[6 *tmpStride] - pad;\
  951. const int tmp7= tmp[7 *tmpStride] - pad;\
  952. const int tmp8= tmp[8 *tmpStride] - pad;\
  953. const int tmp9= tmp[9 *tmpStride] - pad;\
  954. const int tmp10=tmp[10*tmpStride] - pad;\
  955. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  956. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  957. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  958. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  959. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  960. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  961. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  962. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  963. dst++;\
  964. tmp++;\
  965. }\
  966. }\
  967. \
  968. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  969. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  970. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  971. src += 8*srcStride;\
  972. dst += 8*dstStride;\
  973. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  974. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  975. }\
  976. \
  977. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  978. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  979. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  980. src += 8*srcStride;\
  981. dst += 8*dstStride;\
  982. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  983. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  984. }\
  985. \
  986. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  987. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  988. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  989. src += 8*srcStride;\
  990. dst += 8*dstStride;\
  991. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  992. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  993. }\
  994. #define H264_MC(OPNAME, SIZE) \
  995. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  996. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  997. }\
  998. \
  999. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  1000. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1001. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1002. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1003. }\
  1004. \
  1005. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  1006. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  1007. }\
  1008. \
  1009. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1010. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1011. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1012. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1013. }\
  1014. \
  1015. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1016. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1017. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1018. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1019. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1020. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1021. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1022. }\
  1023. \
  1024. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1025. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1026. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1027. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1028. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1029. }\
  1030. \
  1031. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1032. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1033. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1034. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1035. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1036. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1037. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1038. }\
  1039. \
  1040. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1041. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1042. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1043. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1044. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1045. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1046. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1047. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1048. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1049. }\
  1050. \
  1051. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1052. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1053. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1054. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1055. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1056. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1057. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1058. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1059. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1060. }\
  1061. \
  1062. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1063. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1064. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1065. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1066. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1067. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1068. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1069. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1070. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1071. }\
  1072. \
  1073. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1074. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1075. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1076. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1077. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1078. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1079. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1080. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1081. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1082. }\
  1083. \
  1084. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1085. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1086. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1087. }\
  1088. \
  1089. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1090. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1091. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1092. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1093. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1094. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1095. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1096. }\
  1097. \
  1098. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1099. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1100. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1101. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1102. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1103. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1104. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1105. }\
  1106. \
  1107. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1108. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1109. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1110. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1111. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1112. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1113. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1114. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1115. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1116. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1117. }\
  1118. \
  1119. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1120. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1121. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1122. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1123. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1124. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1125. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1126. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1127. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1128. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1129. }\
  1130. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1131. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1132. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1133. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1134. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1135. H264_LOWPASS(put_ , op_put, op2_put)
  1136. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1137. H264_MC(put_, 2)
  1138. H264_MC(put_, 4)
  1139. H264_MC(put_, 8)
  1140. H264_MC(put_, 16)
  1141. H264_MC(avg_, 4)
  1142. H264_MC(avg_, 8)
  1143. H264_MC(avg_, 16)
  1144. #undef op_avg
  1145. #undef op_put
  1146. #undef op2_avg
  1147. #undef op2_put
  1148. #if BIT_DEPTH == 8
  1149. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1150. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1151. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1152. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1153. #elif BIT_DEPTH == 9
  1154. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1155. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1156. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1157. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1158. #elif BIT_DEPTH == 10
  1159. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1160. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1161. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1162. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1163. #elif BIT_DEPTH == 12
  1164. # define put_h264_qpel8_mc00_12_c ff_put_pixels8x8_12_c
  1165. # define avg_h264_qpel8_mc00_12_c ff_avg_pixels8x8_12_c
  1166. # define put_h264_qpel16_mc00_12_c ff_put_pixels16x16_12_c
  1167. # define avg_h264_qpel16_mc00_12_c ff_avg_pixels16x16_12_c
  1168. #elif BIT_DEPTH == 14
  1169. # define put_h264_qpel8_mc00_14_c ff_put_pixels8x8_14_c
  1170. # define avg_h264_qpel8_mc00_14_c ff_avg_pixels8x8_14_c
  1171. # define put_h264_qpel16_mc00_14_c ff_put_pixels16x16_14_c
  1172. # define avg_h264_qpel16_mc00_14_c ff_avg_pixels16x16_14_c
  1173. #endif
  1174. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1175. FUNCC(put_pixels8)(dst, src, stride, 8);
  1176. }
  1177. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1178. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1179. }
  1180. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1181. FUNCC(put_pixels16)(dst, src, stride, 16);
  1182. }
  1183. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1184. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1185. }