You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1293 lines
56KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)p_buf;
  78. int wrap = p_wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. /**
  107. * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
  108. * @param buf destination buffer
  109. * @param src source buffer
  110. * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
  111. * @param block_w width of block
  112. * @param block_h height of block
  113. * @param src_x x coordinate of the top left sample of the block in the source buffer
  114. * @param src_y y coordinate of the top left sample of the block in the source buffer
  115. * @param w width of the source buffer
  116. * @param h height of the source buffer
  117. */
  118. void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
  119. int src_x, int src_y, int w, int h){
  120. int x, y;
  121. int start_y, start_x, end_y, end_x;
  122. if(src_y>= h){
  123. src-= src_y*linesize;
  124. src+= (h-1)*linesize;
  125. src_y=h-1;
  126. }else if(src_y<=-block_h){
  127. src-= src_y*linesize;
  128. src+= (1-block_h)*linesize;
  129. src_y=1-block_h;
  130. }
  131. if(src_x>= w){
  132. src+= (w-1-src_x)*sizeof(pixel);
  133. src_x=w-1;
  134. }else if(src_x<=-block_w){
  135. src+= (1-block_w-src_x)*sizeof(pixel);
  136. src_x=1-block_w;
  137. }
  138. start_y= FFMAX(0, -src_y);
  139. start_x= FFMAX(0, -src_x);
  140. end_y= FFMIN(block_h, h-src_y);
  141. end_x= FFMIN(block_w, w-src_x);
  142. av_assert2(start_y < end_y && block_h);
  143. av_assert2(start_x < end_x && block_w);
  144. w = end_x - start_x;
  145. src += start_y*linesize + start_x*sizeof(pixel);
  146. buf += start_x*sizeof(pixel);
  147. //top
  148. for(y=0; y<start_y; y++){
  149. memcpy(buf, src, w*sizeof(pixel));
  150. buf += linesize;
  151. }
  152. // copy existing part
  153. for(; y<end_y; y++){
  154. memcpy(buf, src, w*sizeof(pixel));
  155. src += linesize;
  156. buf += linesize;
  157. }
  158. //bottom
  159. src -= linesize;
  160. for(; y<block_h; y++){
  161. memcpy(buf, src, w*sizeof(pixel));
  162. buf += linesize;
  163. }
  164. buf -= block_h * linesize + start_x*sizeof(pixel);
  165. while (block_h--){
  166. pixel *bufp = (pixel*)buf;
  167. //left
  168. for(x=0; x<start_x; x++){
  169. bufp[x] = bufp[start_x];
  170. }
  171. //right
  172. for(x=end_x; x<block_w; x++){
  173. bufp[x] = bufp[end_x - 1];
  174. }
  175. buf += linesize;
  176. }
  177. }
  178. #define DCTELEM_FUNCS(dctcoef, suffix) \
  179. static void FUNCC(get_pixels ## suffix)(DCTELEM *av_restrict _block, \
  180. const uint8_t *_pixels, \
  181. int line_size) \
  182. { \
  183. const pixel *pixels = (const pixel *) _pixels; \
  184. dctcoef *av_restrict block = (dctcoef *) _block; \
  185. int i; \
  186. \
  187. /* read the pixels */ \
  188. for(i=0;i<8;i++) { \
  189. block[0] = pixels[0]; \
  190. block[1] = pixels[1]; \
  191. block[2] = pixels[2]; \
  192. block[3] = pixels[3]; \
  193. block[4] = pixels[4]; \
  194. block[5] = pixels[5]; \
  195. block[6] = pixels[6]; \
  196. block[7] = pixels[7]; \
  197. pixels += line_size / sizeof(pixel); \
  198. block += 8; \
  199. } \
  200. } \
  201. \
  202. static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels, \
  203. DCTELEM *_block, \
  204. int line_size) \
  205. { \
  206. int i; \
  207. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  208. dctcoef *block = (dctcoef*)_block; \
  209. line_size /= sizeof(pixel); \
  210. \
  211. for(i=0;i<8;i++) { \
  212. pixels[0] += block[0]; \
  213. pixels[1] += block[1]; \
  214. pixels[2] += block[2]; \
  215. pixels[3] += block[3]; \
  216. pixels[4] += block[4]; \
  217. pixels[5] += block[5]; \
  218. pixels[6] += block[6]; \
  219. pixels[7] += block[7]; \
  220. pixels += line_size; \
  221. block += 8; \
  222. } \
  223. } \
  224. \
  225. static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels, \
  226. DCTELEM *_block, \
  227. int line_size) \
  228. { \
  229. int i; \
  230. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  231. dctcoef *block = (dctcoef*)_block; \
  232. line_size /= sizeof(pixel); \
  233. \
  234. for(i=0;i<4;i++) { \
  235. pixels[0] += block[0]; \
  236. pixels[1] += block[1]; \
  237. pixels[2] += block[2]; \
  238. pixels[3] += block[3]; \
  239. pixels += line_size; \
  240. block += 4; \
  241. } \
  242. } \
  243. \
  244. static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
  245. { \
  246. memset(block, 0, sizeof(dctcoef)*64); \
  247. } \
  248. \
  249. /** \
  250. * memset(blocks, 0, sizeof(DCTELEM)*6*64) \
  251. */ \
  252. static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
  253. { \
  254. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  255. }
  256. DCTELEM_FUNCS(DCTELEM, _16)
  257. #if BIT_DEPTH > 8
  258. DCTELEM_FUNCS(dctcoef, _32)
  259. #endif
  260. #define PIXOP2(OPNAME, OP) \
  261. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  262. int i;\
  263. for(i=0; i<h; i++){\
  264. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  265. pixels+=line_size;\
  266. block +=line_size;\
  267. }\
  268. }\
  269. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  270. int i;\
  271. for(i=0; i<h; i++){\
  272. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  273. pixels+=line_size;\
  274. block +=line_size;\
  275. }\
  276. }\
  277. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  278. int i;\
  279. for(i=0; i<h; i++){\
  280. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  281. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  282. pixels+=line_size;\
  283. block +=line_size;\
  284. }\
  285. }\
  286. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  287. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  288. }\
  289. \
  290. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  291. int src_stride1, int src_stride2, int h){\
  292. int i;\
  293. for(i=0; i<h; i++){\
  294. pixel4 a,b;\
  295. a= AV_RN4P(&src1[i*src_stride1 ]);\
  296. b= AV_RN4P(&src2[i*src_stride2 ]);\
  297. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  298. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  299. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  300. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  301. }\
  302. }\
  303. \
  304. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  305. int src_stride1, int src_stride2, int h){\
  306. int i;\
  307. for(i=0; i<h; i++){\
  308. pixel4 a,b;\
  309. a= AV_RN4P(&src1[i*src_stride1 ]);\
  310. b= AV_RN4P(&src2[i*src_stride2 ]);\
  311. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  312. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  313. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  314. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  315. }\
  316. }\
  317. \
  318. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  319. int src_stride1, int src_stride2, int h){\
  320. int i;\
  321. for(i=0; i<h; i++){\
  322. pixel4 a,b;\
  323. a= AV_RN4P(&src1[i*src_stride1 ]);\
  324. b= AV_RN4P(&src2[i*src_stride2 ]);\
  325. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  326. }\
  327. }\
  328. \
  329. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  330. int src_stride1, int src_stride2, int h){\
  331. int i;\
  332. for(i=0; i<h; i++){\
  333. pixel4 a,b;\
  334. a= AV_RN2P(&src1[i*src_stride1 ]);\
  335. b= AV_RN2P(&src2[i*src_stride2 ]);\
  336. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  337. }\
  338. }\
  339. \
  340. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  341. int src_stride1, int src_stride2, int h){\
  342. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  343. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  344. }\
  345. \
  346. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  347. int src_stride1, int src_stride2, int h){\
  348. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  349. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  350. }\
  351. \
  352. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  353. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  354. }\
  355. \
  356. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  357. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  358. }\
  359. \
  360. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  361. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  362. }\
  363. \
  364. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  365. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  366. }\
  367. \
  368. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  369. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  370. /* FIXME HIGH BIT DEPTH */\
  371. int i;\
  372. for(i=0; i<h; i++){\
  373. uint32_t a, b, c, d, l0, l1, h0, h1;\
  374. a= AV_RN32(&src1[i*src_stride1]);\
  375. b= AV_RN32(&src2[i*src_stride2]);\
  376. c= AV_RN32(&src3[i*src_stride3]);\
  377. d= AV_RN32(&src4[i*src_stride4]);\
  378. l0= (a&0x03030303UL)\
  379. + (b&0x03030303UL)\
  380. + 0x02020202UL;\
  381. h0= ((a&0xFCFCFCFCUL)>>2)\
  382. + ((b&0xFCFCFCFCUL)>>2);\
  383. l1= (c&0x03030303UL)\
  384. + (d&0x03030303UL);\
  385. h1= ((c&0xFCFCFCFCUL)>>2)\
  386. + ((d&0xFCFCFCFCUL)>>2);\
  387. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  388. a= AV_RN32(&src1[i*src_stride1+4]);\
  389. b= AV_RN32(&src2[i*src_stride2+4]);\
  390. c= AV_RN32(&src3[i*src_stride3+4]);\
  391. d= AV_RN32(&src4[i*src_stride4+4]);\
  392. l0= (a&0x03030303UL)\
  393. + (b&0x03030303UL)\
  394. + 0x02020202UL;\
  395. h0= ((a&0xFCFCFCFCUL)>>2)\
  396. + ((b&0xFCFCFCFCUL)>>2);\
  397. l1= (c&0x03030303UL)\
  398. + (d&0x03030303UL);\
  399. h1= ((c&0xFCFCFCFCUL)>>2)\
  400. + ((d&0xFCFCFCFCUL)>>2);\
  401. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  402. }\
  403. }\
  404. \
  405. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  406. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  407. }\
  408. \
  409. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  410. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  411. }\
  412. \
  413. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  414. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  415. }\
  416. \
  417. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  418. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  419. }\
  420. \
  421. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  422. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  423. /* FIXME HIGH BIT DEPTH*/\
  424. int i;\
  425. for(i=0; i<h; i++){\
  426. uint32_t a, b, c, d, l0, l1, h0, h1;\
  427. a= AV_RN32(&src1[i*src_stride1]);\
  428. b= AV_RN32(&src2[i*src_stride2]);\
  429. c= AV_RN32(&src3[i*src_stride3]);\
  430. d= AV_RN32(&src4[i*src_stride4]);\
  431. l0= (a&0x03030303UL)\
  432. + (b&0x03030303UL)\
  433. + 0x01010101UL;\
  434. h0= ((a&0xFCFCFCFCUL)>>2)\
  435. + ((b&0xFCFCFCFCUL)>>2);\
  436. l1= (c&0x03030303UL)\
  437. + (d&0x03030303UL);\
  438. h1= ((c&0xFCFCFCFCUL)>>2)\
  439. + ((d&0xFCFCFCFCUL)>>2);\
  440. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  441. a= AV_RN32(&src1[i*src_stride1+4]);\
  442. b= AV_RN32(&src2[i*src_stride2+4]);\
  443. c= AV_RN32(&src3[i*src_stride3+4]);\
  444. d= AV_RN32(&src4[i*src_stride4+4]);\
  445. l0= (a&0x03030303UL)\
  446. + (b&0x03030303UL)\
  447. + 0x01010101UL;\
  448. h0= ((a&0xFCFCFCFCUL)>>2)\
  449. + ((b&0xFCFCFCFCUL)>>2);\
  450. l1= (c&0x03030303UL)\
  451. + (d&0x03030303UL);\
  452. h1= ((c&0xFCFCFCFCUL)>>2)\
  453. + ((d&0xFCFCFCFCUL)>>2);\
  454. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  455. }\
  456. }\
  457. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  458. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  459. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  460. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  461. }\
  462. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  463. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  464. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  465. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  466. }\
  467. \
  468. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t *p_pixels, int line_size, int h)\
  469. {\
  470. int i, a0, b0, a1, b1;\
  471. pixel *block = (pixel*)p_block;\
  472. const pixel *pixels = (const pixel*)p_pixels;\
  473. line_size >>= sizeof(pixel)-1;\
  474. a0= pixels[0];\
  475. b0= pixels[1] + 2;\
  476. a0 += b0;\
  477. b0 += pixels[2];\
  478. \
  479. pixels+=line_size;\
  480. for(i=0; i<h; i+=2){\
  481. a1= pixels[0];\
  482. b1= pixels[1];\
  483. a1 += b1;\
  484. b1 += pixels[2];\
  485. \
  486. block[0]= (a1+a0)>>2; /* FIXME non put */\
  487. block[1]= (b1+b0)>>2;\
  488. \
  489. pixels+=line_size;\
  490. block +=line_size;\
  491. \
  492. a0= pixels[0];\
  493. b0= pixels[1] + 2;\
  494. a0 += b0;\
  495. b0 += pixels[2];\
  496. \
  497. block[0]= (a1+a0)>>2;\
  498. block[1]= (b1+b0)>>2;\
  499. pixels+=line_size;\
  500. block +=line_size;\
  501. }\
  502. }\
  503. \
  504. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  505. {\
  506. /* FIXME HIGH BIT DEPTH */\
  507. int i;\
  508. const uint32_t a= AV_RN32(pixels );\
  509. const uint32_t b= AV_RN32(pixels+1);\
  510. uint32_t l0= (a&0x03030303UL)\
  511. + (b&0x03030303UL)\
  512. + 0x02020202UL;\
  513. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  514. + ((b&0xFCFCFCFCUL)>>2);\
  515. uint32_t l1,h1;\
  516. \
  517. pixels+=line_size;\
  518. for(i=0; i<h; i+=2){\
  519. uint32_t a= AV_RN32(pixels );\
  520. uint32_t b= AV_RN32(pixels+1);\
  521. l1= (a&0x03030303UL)\
  522. + (b&0x03030303UL);\
  523. h1= ((a&0xFCFCFCFCUL)>>2)\
  524. + ((b&0xFCFCFCFCUL)>>2);\
  525. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  526. pixels+=line_size;\
  527. block +=line_size;\
  528. a= AV_RN32(pixels );\
  529. b= AV_RN32(pixels+1);\
  530. l0= (a&0x03030303UL)\
  531. + (b&0x03030303UL)\
  532. + 0x02020202UL;\
  533. h0= ((a&0xFCFCFCFCUL)>>2)\
  534. + ((b&0xFCFCFCFCUL)>>2);\
  535. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  536. pixels+=line_size;\
  537. block +=line_size;\
  538. }\
  539. }\
  540. \
  541. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  542. {\
  543. /* FIXME HIGH BIT DEPTH */\
  544. int j;\
  545. for(j=0; j<2; j++){\
  546. int i;\
  547. const uint32_t a= AV_RN32(pixels );\
  548. const uint32_t b= AV_RN32(pixels+1);\
  549. uint32_t l0= (a&0x03030303UL)\
  550. + (b&0x03030303UL)\
  551. + 0x02020202UL;\
  552. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  553. + ((b&0xFCFCFCFCUL)>>2);\
  554. uint32_t l1,h1;\
  555. \
  556. pixels+=line_size;\
  557. for(i=0; i<h; i+=2){\
  558. uint32_t a= AV_RN32(pixels );\
  559. uint32_t b= AV_RN32(pixels+1);\
  560. l1= (a&0x03030303UL)\
  561. + (b&0x03030303UL);\
  562. h1= ((a&0xFCFCFCFCUL)>>2)\
  563. + ((b&0xFCFCFCFCUL)>>2);\
  564. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  565. pixels+=line_size;\
  566. block +=line_size;\
  567. a= AV_RN32(pixels );\
  568. b= AV_RN32(pixels+1);\
  569. l0= (a&0x03030303UL)\
  570. + (b&0x03030303UL)\
  571. + 0x02020202UL;\
  572. h0= ((a&0xFCFCFCFCUL)>>2)\
  573. + ((b&0xFCFCFCFCUL)>>2);\
  574. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  575. pixels+=line_size;\
  576. block +=line_size;\
  577. }\
  578. pixels+=4-line_size*(h+1);\
  579. block +=4-line_size*h;\
  580. }\
  581. }\
  582. \
  583. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  584. {\
  585. /* FIXME HIGH BIT DEPTH */\
  586. int j;\
  587. for(j=0; j<2; j++){\
  588. int i;\
  589. const uint32_t a= AV_RN32(pixels );\
  590. const uint32_t b= AV_RN32(pixels+1);\
  591. uint32_t l0= (a&0x03030303UL)\
  592. + (b&0x03030303UL)\
  593. + 0x01010101UL;\
  594. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  595. + ((b&0xFCFCFCFCUL)>>2);\
  596. uint32_t l1,h1;\
  597. \
  598. pixels+=line_size;\
  599. for(i=0; i<h; i+=2){\
  600. uint32_t a= AV_RN32(pixels );\
  601. uint32_t b= AV_RN32(pixels+1);\
  602. l1= (a&0x03030303UL)\
  603. + (b&0x03030303UL);\
  604. h1= ((a&0xFCFCFCFCUL)>>2)\
  605. + ((b&0xFCFCFCFCUL)>>2);\
  606. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  607. pixels+=line_size;\
  608. block +=line_size;\
  609. a= AV_RN32(pixels );\
  610. b= AV_RN32(pixels+1);\
  611. l0= (a&0x03030303UL)\
  612. + (b&0x03030303UL)\
  613. + 0x01010101UL;\
  614. h0= ((a&0xFCFCFCFCUL)>>2)\
  615. + ((b&0xFCFCFCFCUL)>>2);\
  616. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  617. pixels+=line_size;\
  618. block +=line_size;\
  619. }\
  620. pixels+=4-line_size*(h+1);\
  621. block +=4-line_size*h;\
  622. }\
  623. }\
  624. \
  625. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  626. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  627. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  628. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  629. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  630. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  631. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  632. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  633. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  634. #define op_put(a, b) a = b
  635. PIXOP2(avg, op_avg)
  636. PIXOP2(put, op_put)
  637. #undef op_avg
  638. #undef op_put
  639. #define put_no_rnd_pixels8_c put_pixels8_c
  640. #define put_no_rnd_pixels16_c put_pixels16_c
  641. static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  642. FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
  643. }
  644. static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  645. FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
  646. }
  647. #define H264_CHROMA_MC(OPNAME, OP)\
  648. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  649. pixel *dst = (pixel*)p_dst;\
  650. pixel *src = (pixel*)p_src;\
  651. const int A=(8-x)*(8-y);\
  652. const int B=( x)*(8-y);\
  653. const int C=(8-x)*( y);\
  654. const int D=( x)*( y);\
  655. int i;\
  656. stride >>= sizeof(pixel)-1;\
  657. \
  658. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  659. \
  660. if(D){\
  661. for(i=0; i<h; i++){\
  662. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  663. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  664. dst+= stride;\
  665. src+= stride;\
  666. }\
  667. }else{\
  668. const int E= B+C;\
  669. const int step= C ? stride : 1;\
  670. for(i=0; i<h; i++){\
  671. OP(dst[0], (A*src[0] + E*src[step+0]));\
  672. OP(dst[1], (A*src[1] + E*src[step+1]));\
  673. dst+= stride;\
  674. src+= stride;\
  675. }\
  676. }\
  677. }\
  678. \
  679. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  680. pixel *dst = (pixel*)p_dst;\
  681. pixel *src = (pixel*)p_src;\
  682. const int A=(8-x)*(8-y);\
  683. const int B=( x)*(8-y);\
  684. const int C=(8-x)*( y);\
  685. const int D=( x)*( y);\
  686. int i;\
  687. stride >>= sizeof(pixel)-1;\
  688. \
  689. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  690. \
  691. if(D){\
  692. for(i=0; i<h; i++){\
  693. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  694. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  695. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  696. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  697. dst+= stride;\
  698. src+= stride;\
  699. }\
  700. }else{\
  701. const int E= B+C;\
  702. const int step= C ? stride : 1;\
  703. for(i=0; i<h; i++){\
  704. OP(dst[0], (A*src[0] + E*src[step+0]));\
  705. OP(dst[1], (A*src[1] + E*src[step+1]));\
  706. OP(dst[2], (A*src[2] + E*src[step+2]));\
  707. OP(dst[3], (A*src[3] + E*src[step+3]));\
  708. dst+= stride;\
  709. src+= stride;\
  710. }\
  711. }\
  712. }\
  713. \
  714. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  715. pixel *dst = (pixel*)p_dst;\
  716. pixel *src = (pixel*)p_src;\
  717. const int A=(8-x)*(8-y);\
  718. const int B=( x)*(8-y);\
  719. const int C=(8-x)*( y);\
  720. const int D=( x)*( y);\
  721. int i;\
  722. stride >>= sizeof(pixel)-1;\
  723. \
  724. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  725. \
  726. if(D){\
  727. for(i=0; i<h; i++){\
  728. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  729. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  730. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  731. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  732. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  733. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  734. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  735. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  736. dst+= stride;\
  737. src+= stride;\
  738. }\
  739. }else{\
  740. const int E= B+C;\
  741. const int step= C ? stride : 1;\
  742. for(i=0; i<h; i++){\
  743. OP(dst[0], (A*src[0] + E*src[step+0]));\
  744. OP(dst[1], (A*src[1] + E*src[step+1]));\
  745. OP(dst[2], (A*src[2] + E*src[step+2]));\
  746. OP(dst[3], (A*src[3] + E*src[step+3]));\
  747. OP(dst[4], (A*src[4] + E*src[step+4]));\
  748. OP(dst[5], (A*src[5] + E*src[step+5]));\
  749. OP(dst[6], (A*src[6] + E*src[step+6]));\
  750. OP(dst[7], (A*src[7] + E*src[step+7]));\
  751. dst+= stride;\
  752. src+= stride;\
  753. }\
  754. }\
  755. }
  756. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  757. #define op_put(a, b) a = (((b) + 32)>>6)
  758. H264_CHROMA_MC(put_ , op_put)
  759. H264_CHROMA_MC(avg_ , op_avg)
  760. #undef op_avg
  761. #undef op_put
  762. #define H264_LOWPASS(OPNAME, OP, OP2) \
  763. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  764. const int h=2;\
  765. INIT_CLIP\
  766. int i;\
  767. pixel *dst = (pixel*)p_dst;\
  768. pixel *src = (pixel*)p_src;\
  769. dstStride >>= sizeof(pixel)-1;\
  770. srcStride >>= sizeof(pixel)-1;\
  771. for(i=0; i<h; i++)\
  772. {\
  773. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  774. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  775. dst+=dstStride;\
  776. src+=srcStride;\
  777. }\
  778. }\
  779. \
  780. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  781. const int w=2;\
  782. INIT_CLIP\
  783. int i;\
  784. pixel *dst = (pixel*)p_dst;\
  785. pixel *src = (pixel*)p_src;\
  786. dstStride >>= sizeof(pixel)-1;\
  787. srcStride >>= sizeof(pixel)-1;\
  788. for(i=0; i<w; i++)\
  789. {\
  790. const int srcB= src[-2*srcStride];\
  791. const int srcA= src[-1*srcStride];\
  792. const int src0= src[0 *srcStride];\
  793. const int src1= src[1 *srcStride];\
  794. const int src2= src[2 *srcStride];\
  795. const int src3= src[3 *srcStride];\
  796. const int src4= src[4 *srcStride];\
  797. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  798. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  799. dst++;\
  800. src++;\
  801. }\
  802. }\
  803. \
  804. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  805. const int h=2;\
  806. const int w=2;\
  807. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  808. INIT_CLIP\
  809. int i;\
  810. pixel *dst = (pixel*)p_dst;\
  811. pixel *src = (pixel*)p_src;\
  812. dstStride >>= sizeof(pixel)-1;\
  813. srcStride >>= sizeof(pixel)-1;\
  814. src -= 2*srcStride;\
  815. for(i=0; i<h+5; i++)\
  816. {\
  817. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  818. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  819. tmp+=tmpStride;\
  820. src+=srcStride;\
  821. }\
  822. tmp -= tmpStride*(h+5-2);\
  823. for(i=0; i<w; i++)\
  824. {\
  825. const int tmpB= tmp[-2*tmpStride] - pad;\
  826. const int tmpA= tmp[-1*tmpStride] - pad;\
  827. const int tmp0= tmp[0 *tmpStride] - pad;\
  828. const int tmp1= tmp[1 *tmpStride] - pad;\
  829. const int tmp2= tmp[2 *tmpStride] - pad;\
  830. const int tmp3= tmp[3 *tmpStride] - pad;\
  831. const int tmp4= tmp[4 *tmpStride] - pad;\
  832. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  833. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  834. dst++;\
  835. tmp++;\
  836. }\
  837. }\
  838. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  839. const int h=4;\
  840. INIT_CLIP\
  841. int i;\
  842. pixel *dst = (pixel*)p_dst;\
  843. pixel *src = (pixel*)p_src;\
  844. dstStride >>= sizeof(pixel)-1;\
  845. srcStride >>= sizeof(pixel)-1;\
  846. for(i=0; i<h; i++)\
  847. {\
  848. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  849. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  850. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  851. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  852. dst+=dstStride;\
  853. src+=srcStride;\
  854. }\
  855. }\
  856. \
  857. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  858. const int w=4;\
  859. INIT_CLIP\
  860. int i;\
  861. pixel *dst = (pixel*)p_dst;\
  862. pixel *src = (pixel*)p_src;\
  863. dstStride >>= sizeof(pixel)-1;\
  864. srcStride >>= sizeof(pixel)-1;\
  865. for(i=0; i<w; i++)\
  866. {\
  867. const int srcB= src[-2*srcStride];\
  868. const int srcA= src[-1*srcStride];\
  869. const int src0= src[0 *srcStride];\
  870. const int src1= src[1 *srcStride];\
  871. const int src2= src[2 *srcStride];\
  872. const int src3= src[3 *srcStride];\
  873. const int src4= src[4 *srcStride];\
  874. const int src5= src[5 *srcStride];\
  875. const int src6= src[6 *srcStride];\
  876. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  877. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  878. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  879. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  880. dst++;\
  881. src++;\
  882. }\
  883. }\
  884. \
  885. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  886. const int h=4;\
  887. const int w=4;\
  888. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  889. INIT_CLIP\
  890. int i;\
  891. pixel *dst = (pixel*)p_dst;\
  892. pixel *src = (pixel*)p_src;\
  893. dstStride >>= sizeof(pixel)-1;\
  894. srcStride >>= sizeof(pixel)-1;\
  895. src -= 2*srcStride;\
  896. for(i=0; i<h+5; i++)\
  897. {\
  898. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  899. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  900. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  901. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  902. tmp+=tmpStride;\
  903. src+=srcStride;\
  904. }\
  905. tmp -= tmpStride*(h+5-2);\
  906. for(i=0; i<w; i++)\
  907. {\
  908. const int tmpB= tmp[-2*tmpStride] - pad;\
  909. const int tmpA= tmp[-1*tmpStride] - pad;\
  910. const int tmp0= tmp[0 *tmpStride] - pad;\
  911. const int tmp1= tmp[1 *tmpStride] - pad;\
  912. const int tmp2= tmp[2 *tmpStride] - pad;\
  913. const int tmp3= tmp[3 *tmpStride] - pad;\
  914. const int tmp4= tmp[4 *tmpStride] - pad;\
  915. const int tmp5= tmp[5 *tmpStride] - pad;\
  916. const int tmp6= tmp[6 *tmpStride] - pad;\
  917. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  918. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  919. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  920. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  921. dst++;\
  922. tmp++;\
  923. }\
  924. }\
  925. \
  926. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  927. const int h=8;\
  928. INIT_CLIP\
  929. int i;\
  930. pixel *dst = (pixel*)p_dst;\
  931. pixel *src = (pixel*)p_src;\
  932. dstStride >>= sizeof(pixel)-1;\
  933. srcStride >>= sizeof(pixel)-1;\
  934. for(i=0; i<h; i++)\
  935. {\
  936. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  937. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  938. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  939. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  940. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  941. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  942. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  943. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  944. dst+=dstStride;\
  945. src+=srcStride;\
  946. }\
  947. }\
  948. \
  949. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
  950. const int w=8;\
  951. INIT_CLIP\
  952. int i;\
  953. pixel *dst = (pixel*)p_dst;\
  954. pixel *src = (pixel*)p_src;\
  955. dstStride >>= sizeof(pixel)-1;\
  956. srcStride >>= sizeof(pixel)-1;\
  957. for(i=0; i<w; i++)\
  958. {\
  959. const int srcB= src[-2*srcStride];\
  960. const int srcA= src[-1*srcStride];\
  961. const int src0= src[0 *srcStride];\
  962. const int src1= src[1 *srcStride];\
  963. const int src2= src[2 *srcStride];\
  964. const int src3= src[3 *srcStride];\
  965. const int src4= src[4 *srcStride];\
  966. const int src5= src[5 *srcStride];\
  967. const int src6= src[6 *srcStride];\
  968. const int src7= src[7 *srcStride];\
  969. const int src8= src[8 *srcStride];\
  970. const int src9= src[9 *srcStride];\
  971. const int src10=src[10*srcStride];\
  972. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  973. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  974. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  975. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  976. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  977. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  978. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  979. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  980. dst++;\
  981. src++;\
  982. }\
  983. }\
  984. \
  985. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
  986. const int h=8;\
  987. const int w=8;\
  988. const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  989. INIT_CLIP\
  990. int i;\
  991. pixel *dst = (pixel*)p_dst;\
  992. pixel *src = (pixel*)p_src;\
  993. dstStride >>= sizeof(pixel)-1;\
  994. srcStride >>= sizeof(pixel)-1;\
  995. src -= 2*srcStride;\
  996. for(i=0; i<h+5; i++)\
  997. {\
  998. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  999. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  1000. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  1001. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  1002. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  1003. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  1004. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  1005. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  1006. tmp+=tmpStride;\
  1007. src+=srcStride;\
  1008. }\
  1009. tmp -= tmpStride*(h+5-2);\
  1010. for(i=0; i<w; i++)\
  1011. {\
  1012. const int tmpB= tmp[-2*tmpStride] - pad;\
  1013. const int tmpA= tmp[-1*tmpStride] - pad;\
  1014. const int tmp0= tmp[0 *tmpStride] - pad;\
  1015. const int tmp1= tmp[1 *tmpStride] - pad;\
  1016. const int tmp2= tmp[2 *tmpStride] - pad;\
  1017. const int tmp3= tmp[3 *tmpStride] - pad;\
  1018. const int tmp4= tmp[4 *tmpStride] - pad;\
  1019. const int tmp5= tmp[5 *tmpStride] - pad;\
  1020. const int tmp6= tmp[6 *tmpStride] - pad;\
  1021. const int tmp7= tmp[7 *tmpStride] - pad;\
  1022. const int tmp8= tmp[8 *tmpStride] - pad;\
  1023. const int tmp9= tmp[9 *tmpStride] - pad;\
  1024. const int tmp10=tmp[10*tmpStride] - pad;\
  1025. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  1026. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  1027. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  1028. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  1029. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  1030. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  1031. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  1032. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  1033. dst++;\
  1034. tmp++;\
  1035. }\
  1036. }\
  1037. \
  1038. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  1039. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  1040. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1041. src += 8*srcStride;\
  1042. dst += 8*dstStride;\
  1043. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  1044. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1045. }\
  1046. \
  1047. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  1048. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  1049. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1050. src += 8*srcStride;\
  1051. dst += 8*dstStride;\
  1052. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  1053. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1054. }\
  1055. \
  1056. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  1057. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  1058. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  1059. src += 8*srcStride;\
  1060. dst += 8*dstStride;\
  1061. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  1062. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  1063. }\
  1064. #define H264_MC(OPNAME, SIZE) \
  1065. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  1066. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  1067. }\
  1068. \
  1069. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  1070. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1071. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1072. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1073. }\
  1074. \
  1075. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  1076. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  1077. }\
  1078. \
  1079. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1080. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1081. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1082. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1083. }\
  1084. \
  1085. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1086. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1087. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1088. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1089. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1090. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1091. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1092. }\
  1093. \
  1094. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1095. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1096. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1097. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1098. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1099. }\
  1100. \
  1101. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1102. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1103. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1104. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1105. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1106. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1107. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1108. }\
  1109. \
  1110. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1111. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1112. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1113. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1114. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1115. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1116. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1117. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1118. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1119. }\
  1120. \
  1121. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1122. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1123. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1124. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1125. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1126. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1127. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1128. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1129. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1130. }\
  1131. \
  1132. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1133. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1134. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1135. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1136. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1137. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1138. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1139. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1140. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1141. }\
  1142. \
  1143. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1144. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1145. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1146. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1147. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1148. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1149. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1150. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1151. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1152. }\
  1153. \
  1154. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1155. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1156. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1157. }\
  1158. \
  1159. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1160. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1161. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1162. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1163. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1164. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1165. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1166. }\
  1167. \
  1168. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1169. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1170. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1171. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1172. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1173. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1174. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1175. }\
  1176. \
  1177. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1178. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1179. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1180. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1181. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1182. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1183. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1184. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1185. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1186. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1187. }\
  1188. \
  1189. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1190. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1191. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1192. pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1193. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1194. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1195. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1196. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1197. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1198. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1199. }\
  1200. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1201. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1202. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1203. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1204. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1205. H264_LOWPASS(put_ , op_put, op2_put)
  1206. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1207. H264_MC(put_, 2)
  1208. H264_MC(put_, 4)
  1209. H264_MC(put_, 8)
  1210. H264_MC(put_, 16)
  1211. H264_MC(avg_, 4)
  1212. H264_MC(avg_, 8)
  1213. H264_MC(avg_, 16)
  1214. #undef op_avg
  1215. #undef op_put
  1216. #undef op2_avg
  1217. #undef op2_put
  1218. #if BIT_DEPTH == 8
  1219. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1220. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1221. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1222. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1223. #elif BIT_DEPTH == 9
  1224. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1225. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1226. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1227. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1228. #elif BIT_DEPTH == 10
  1229. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1230. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1231. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1232. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1233. #elif BIT_DEPTH == 12
  1234. # define put_h264_qpel8_mc00_12_c ff_put_pixels8x8_12_c
  1235. # define avg_h264_qpel8_mc00_12_c ff_avg_pixels8x8_12_c
  1236. # define put_h264_qpel16_mc00_12_c ff_put_pixels16x16_12_c
  1237. # define avg_h264_qpel16_mc00_12_c ff_avg_pixels16x16_12_c
  1238. #elif BIT_DEPTH == 14
  1239. # define put_h264_qpel8_mc00_14_c ff_put_pixels8x8_14_c
  1240. # define avg_h264_qpel8_mc00_14_c ff_avg_pixels8x8_14_c
  1241. # define put_h264_qpel16_mc00_14_c ff_put_pixels16x16_14_c
  1242. # define avg_h264_qpel16_mc00_14_c ff_avg_pixels16x16_14_c
  1243. #endif
  1244. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1245. FUNCC(put_pixels8)(dst, src, stride, 8);
  1246. }
  1247. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1248. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1249. }
  1250. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1251. FUNCC(put_pixels16)(dst, src, stride, 16);
  1252. }
  1253. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1254. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1255. }