You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

597 lines
25KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. /* draw the edges of width 'w' of an image of size width, height */
  30. //FIXME check that this is ok for mpeg4 interlaced
  31. static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides)
  32. {
  33. pixel *buf = (pixel*)p_buf;
  34. int wrap = p_wrap / sizeof(pixel);
  35. pixel *ptr, *last_line;
  36. int i;
  37. /* left and right */
  38. ptr = buf;
  39. for(i=0;i<height;i++) {
  40. #if BIT_DEPTH > 8
  41. int j;
  42. for (j = 0; j < w; j++) {
  43. ptr[j-w] = ptr[0];
  44. ptr[j+width] = ptr[width-1];
  45. }
  46. #else
  47. memset(ptr - w, ptr[0], w);
  48. memset(ptr + width, ptr[width-1], w);
  49. #endif
  50. ptr += wrap;
  51. }
  52. /* top and bottom + corners */
  53. buf -= w;
  54. last_line = buf + (height - 1) * wrap;
  55. if (sides & EDGE_TOP)
  56. for(i = 0; i < h; i++)
  57. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  58. if (sides & EDGE_BOTTOM)
  59. for (i = 0; i < h; i++)
  60. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  61. }
  62. #define DCTELEM_FUNCS(dctcoef, suffix) \
  63. static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block, \
  64. const uint8_t *_pixels, \
  65. int line_size) \
  66. { \
  67. const pixel *pixels = (const pixel *) _pixels; \
  68. dctcoef *av_restrict block = (dctcoef *) _block; \
  69. int i; \
  70. \
  71. /* read the pixels */ \
  72. for(i=0;i<8;i++) { \
  73. block[0] = pixels[0]; \
  74. block[1] = pixels[1]; \
  75. block[2] = pixels[2]; \
  76. block[3] = pixels[3]; \
  77. block[4] = pixels[4]; \
  78. block[5] = pixels[5]; \
  79. block[6] = pixels[6]; \
  80. block[7] = pixels[7]; \
  81. pixels += line_size / sizeof(pixel); \
  82. block += 8; \
  83. } \
  84. } \
  85. \
  86. static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels, \
  87. int16_t *_block, \
  88. int line_size) \
  89. { \
  90. int i; \
  91. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  92. dctcoef *block = (dctcoef*)_block; \
  93. line_size /= sizeof(pixel); \
  94. \
  95. for(i=0;i<8;i++) { \
  96. pixels[0] += block[0]; \
  97. pixels[1] += block[1]; \
  98. pixels[2] += block[2]; \
  99. pixels[3] += block[3]; \
  100. pixels[4] += block[4]; \
  101. pixels[5] += block[5]; \
  102. pixels[6] += block[6]; \
  103. pixels[7] += block[7]; \
  104. pixels += line_size; \
  105. block += 8; \
  106. } \
  107. } \
  108. \
  109. static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels, \
  110. int16_t *_block, \
  111. int line_size) \
  112. { \
  113. int i; \
  114. pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
  115. dctcoef *block = (dctcoef*)_block; \
  116. line_size /= sizeof(pixel); \
  117. \
  118. for(i=0;i<4;i++) { \
  119. pixels[0] += block[0]; \
  120. pixels[1] += block[1]; \
  121. pixels[2] += block[2]; \
  122. pixels[3] += block[3]; \
  123. pixels += line_size; \
  124. block += 4; \
  125. } \
  126. } \
  127. \
  128. static void FUNCC(clear_block ## suffix)(int16_t *block) \
  129. { \
  130. memset(block, 0, sizeof(dctcoef)*64); \
  131. } \
  132. \
  133. /** \
  134. * memset(blocks, 0, sizeof(int16_t)*6*64) \
  135. */ \
  136. static void FUNCC(clear_blocks ## suffix)(int16_t *blocks) \
  137. { \
  138. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  139. }
  140. DCTELEM_FUNCS(int16_t, _16)
  141. #if BIT_DEPTH > 8
  142. DCTELEM_FUNCS(dctcoef, _32)
  143. #endif
  144. #include "hpel_template.c"
  145. #define PIXOP2(OPNAME, OP) \
  146. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  147. int src_stride1, int src_stride2, int h){\
  148. int i;\
  149. for(i=0; i<h; i++){\
  150. pixel4 a,b;\
  151. a= AV_RN4P(&src1[i*src_stride1 ]);\
  152. b= AV_RN4P(&src2[i*src_stride2 ]);\
  153. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  154. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  155. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  156. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  157. }\
  158. }\
  159. \
  160. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  161. int src_stride1, int src_stride2, int h){\
  162. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  163. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  164. }\
  165. \
  166. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  167. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  168. }\
  169. \
  170. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  171. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  172. }\
  173. \
  174. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  175. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  176. }\
  177. \
  178. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  179. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  180. }\
  181. \
  182. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  183. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  184. /* FIXME HIGH BIT DEPTH */\
  185. int i;\
  186. for(i=0; i<h; i++){\
  187. uint32_t a, b, c, d, l0, l1, h0, h1;\
  188. a= AV_RN32(&src1[i*src_stride1]);\
  189. b= AV_RN32(&src2[i*src_stride2]);\
  190. c= AV_RN32(&src3[i*src_stride3]);\
  191. d= AV_RN32(&src4[i*src_stride4]);\
  192. l0= (a&0x03030303UL)\
  193. + (b&0x03030303UL)\
  194. + 0x02020202UL;\
  195. h0= ((a&0xFCFCFCFCUL)>>2)\
  196. + ((b&0xFCFCFCFCUL)>>2);\
  197. l1= (c&0x03030303UL)\
  198. + (d&0x03030303UL);\
  199. h1= ((c&0xFCFCFCFCUL)>>2)\
  200. + ((d&0xFCFCFCFCUL)>>2);\
  201. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  202. a= AV_RN32(&src1[i*src_stride1+4]);\
  203. b= AV_RN32(&src2[i*src_stride2+4]);\
  204. c= AV_RN32(&src3[i*src_stride3+4]);\
  205. d= AV_RN32(&src4[i*src_stride4+4]);\
  206. l0= (a&0x03030303UL)\
  207. + (b&0x03030303UL)\
  208. + 0x02020202UL;\
  209. h0= ((a&0xFCFCFCFCUL)>>2)\
  210. + ((b&0xFCFCFCFCUL)>>2);\
  211. l1= (c&0x03030303UL)\
  212. + (d&0x03030303UL);\
  213. h1= ((c&0xFCFCFCFCUL)>>2)\
  214. + ((d&0xFCFCFCFCUL)>>2);\
  215. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  216. }\
  217. }\
  218. \
  219. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  220. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  221. }\
  222. \
  223. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  224. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  225. }\
  226. \
  227. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  228. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  229. }\
  230. \
  231. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  232. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  233. }\
  234. \
  235. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  236. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  237. /* FIXME HIGH BIT DEPTH*/\
  238. int i;\
  239. for(i=0; i<h; i++){\
  240. uint32_t a, b, c, d, l0, l1, h0, h1;\
  241. a= AV_RN32(&src1[i*src_stride1]);\
  242. b= AV_RN32(&src2[i*src_stride2]);\
  243. c= AV_RN32(&src3[i*src_stride3]);\
  244. d= AV_RN32(&src4[i*src_stride4]);\
  245. l0= (a&0x03030303UL)\
  246. + (b&0x03030303UL)\
  247. + 0x01010101UL;\
  248. h0= ((a&0xFCFCFCFCUL)>>2)\
  249. + ((b&0xFCFCFCFCUL)>>2);\
  250. l1= (c&0x03030303UL)\
  251. + (d&0x03030303UL);\
  252. h1= ((c&0xFCFCFCFCUL)>>2)\
  253. + ((d&0xFCFCFCFCUL)>>2);\
  254. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  255. a= AV_RN32(&src1[i*src_stride1+4]);\
  256. b= AV_RN32(&src2[i*src_stride2+4]);\
  257. c= AV_RN32(&src3[i*src_stride3+4]);\
  258. d= AV_RN32(&src4[i*src_stride4+4]);\
  259. l0= (a&0x03030303UL)\
  260. + (b&0x03030303UL)\
  261. + 0x01010101UL;\
  262. h0= ((a&0xFCFCFCFCUL)>>2)\
  263. + ((b&0xFCFCFCFCUL)>>2);\
  264. l1= (c&0x03030303UL)\
  265. + (d&0x03030303UL);\
  266. h1= ((c&0xFCFCFCFCUL)>>2)\
  267. + ((d&0xFCFCFCFCUL)>>2);\
  268. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  269. }\
  270. }\
  271. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  272. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  273. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  274. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  275. }\
  276. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  277. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  278. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  279. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  280. }\
  281. \
  282. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, ptrdiff_t line_size, int h)\
  283. {\
  284. int i, a0, b0, a1, b1;\
  285. pixel *block = (pixel*)_block;\
  286. const pixel *pixels = (const pixel*)_pixels;\
  287. line_size >>= sizeof(pixel)-1;\
  288. a0= pixels[0];\
  289. b0= pixels[1] + 2;\
  290. a0 += b0;\
  291. b0 += pixels[2];\
  292. \
  293. pixels+=line_size;\
  294. for(i=0; i<h; i+=2){\
  295. a1= pixels[0];\
  296. b1= pixels[1];\
  297. a1 += b1;\
  298. b1 += pixels[2];\
  299. \
  300. block[0]= (a1+a0)>>2; /* FIXME non put */\
  301. block[1]= (b1+b0)>>2;\
  302. \
  303. pixels+=line_size;\
  304. block +=line_size;\
  305. \
  306. a0= pixels[0];\
  307. b0= pixels[1] + 2;\
  308. a0 += b0;\
  309. b0 += pixels[2];\
  310. \
  311. block[0]= (a1+a0)>>2;\
  312. block[1]= (b1+b0)>>2;\
  313. pixels+=line_size;\
  314. block +=line_size;\
  315. }\
  316. }\
  317. \
  318. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  319. {\
  320. /* FIXME HIGH BIT DEPTH */\
  321. int i;\
  322. const uint32_t a= AV_RN32(pixels );\
  323. const uint32_t b= AV_RN32(pixels+1);\
  324. uint32_t l0= (a&0x03030303UL)\
  325. + (b&0x03030303UL)\
  326. + 0x02020202UL;\
  327. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  328. + ((b&0xFCFCFCFCUL)>>2);\
  329. uint32_t l1,h1;\
  330. \
  331. pixels+=line_size;\
  332. for(i=0; i<h; i+=2){\
  333. uint32_t a= AV_RN32(pixels );\
  334. uint32_t b= AV_RN32(pixels+1);\
  335. l1= (a&0x03030303UL)\
  336. + (b&0x03030303UL);\
  337. h1= ((a&0xFCFCFCFCUL)>>2)\
  338. + ((b&0xFCFCFCFCUL)>>2);\
  339. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  340. pixels+=line_size;\
  341. block +=line_size;\
  342. a= AV_RN32(pixels );\
  343. b= AV_RN32(pixels+1);\
  344. l0= (a&0x03030303UL)\
  345. + (b&0x03030303UL)\
  346. + 0x02020202UL;\
  347. h0= ((a&0xFCFCFCFCUL)>>2)\
  348. + ((b&0xFCFCFCFCUL)>>2);\
  349. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  350. pixels+=line_size;\
  351. block +=line_size;\
  352. }\
  353. }\
  354. \
  355. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  356. {\
  357. /* FIXME HIGH BIT DEPTH */\
  358. int j;\
  359. for(j=0; j<2; j++){\
  360. int i;\
  361. const uint32_t a= AV_RN32(pixels );\
  362. const uint32_t b= AV_RN32(pixels+1);\
  363. uint32_t l0= (a&0x03030303UL)\
  364. + (b&0x03030303UL)\
  365. + 0x02020202UL;\
  366. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  367. + ((b&0xFCFCFCFCUL)>>2);\
  368. uint32_t l1,h1;\
  369. \
  370. pixels+=line_size;\
  371. for(i=0; i<h; i+=2){\
  372. uint32_t a= AV_RN32(pixels );\
  373. uint32_t b= AV_RN32(pixels+1);\
  374. l1= (a&0x03030303UL)\
  375. + (b&0x03030303UL);\
  376. h1= ((a&0xFCFCFCFCUL)>>2)\
  377. + ((b&0xFCFCFCFCUL)>>2);\
  378. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  379. pixels+=line_size;\
  380. block +=line_size;\
  381. a= AV_RN32(pixels );\
  382. b= AV_RN32(pixels+1);\
  383. l0= (a&0x03030303UL)\
  384. + (b&0x03030303UL)\
  385. + 0x02020202UL;\
  386. h0= ((a&0xFCFCFCFCUL)>>2)\
  387. + ((b&0xFCFCFCFCUL)>>2);\
  388. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  389. pixels+=line_size;\
  390. block +=line_size;\
  391. }\
  392. pixels+=4-line_size*(h+1);\
  393. block +=4-line_size*h;\
  394. }\
  395. }\
  396. \
  397. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  398. {\
  399. /* FIXME HIGH BIT DEPTH */\
  400. int j;\
  401. for(j=0; j<2; j++){\
  402. int i;\
  403. const uint32_t a= AV_RN32(pixels );\
  404. const uint32_t b= AV_RN32(pixels+1);\
  405. uint32_t l0= (a&0x03030303UL)\
  406. + (b&0x03030303UL)\
  407. + 0x01010101UL;\
  408. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  409. + ((b&0xFCFCFCFCUL)>>2);\
  410. uint32_t l1,h1;\
  411. \
  412. pixels+=line_size;\
  413. for(i=0; i<h; i+=2){\
  414. uint32_t a= AV_RN32(pixels );\
  415. uint32_t b= AV_RN32(pixels+1);\
  416. l1= (a&0x03030303UL)\
  417. + (b&0x03030303UL);\
  418. h1= ((a&0xFCFCFCFCUL)>>2)\
  419. + ((b&0xFCFCFCFCUL)>>2);\
  420. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  421. pixels+=line_size;\
  422. block +=line_size;\
  423. a= AV_RN32(pixels );\
  424. b= AV_RN32(pixels+1);\
  425. l0= (a&0x03030303UL)\
  426. + (b&0x03030303UL)\
  427. + 0x01010101UL;\
  428. h0= ((a&0xFCFCFCFCUL)>>2)\
  429. + ((b&0xFCFCFCFCUL)>>2);\
  430. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  431. pixels+=line_size;\
  432. block +=line_size;\
  433. }\
  434. pixels+=4-line_size*(h+1);\
  435. block +=4-line_size*h;\
  436. }\
  437. }\
  438. \
  439. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  440. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  441. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  442. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  443. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  444. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  445. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  446. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  447. #define op_put(a, b) a = b
  448. #if BIT_DEPTH == 8
  449. #define put_no_rnd_pixels8_8_c put_pixels8_8_c
  450. PIXOP2(avg, op_avg)
  451. PIXOP2(put, op_put)
  452. #endif
  453. #undef op_avg
  454. #undef op_put
  455. #define H264_CHROMA_MC(OPNAME, OP)\
  456. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  457. pixel *dst = (pixel*)p_dst;\
  458. pixel *src = (pixel*)p_src;\
  459. const int A=(8-x)*(8-y);\
  460. const int B=( x)*(8-y);\
  461. const int C=(8-x)*( y);\
  462. const int D=( x)*( y);\
  463. int i;\
  464. stride >>= sizeof(pixel)-1;\
  465. \
  466. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  467. \
  468. if(D){\
  469. for(i=0; i<h; i++){\
  470. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  471. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  472. dst+= stride;\
  473. src+= stride;\
  474. }\
  475. }else{\
  476. const int E= B+C;\
  477. const int step= C ? stride : 1;\
  478. for(i=0; i<h; i++){\
  479. OP(dst[0], (A*src[0] + E*src[step+0]));\
  480. OP(dst[1], (A*src[1] + E*src[step+1]));\
  481. dst+= stride;\
  482. src+= stride;\
  483. }\
  484. }\
  485. }\
  486. \
  487. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  488. pixel *dst = (pixel*)p_dst;\
  489. pixel *src = (pixel*)p_src;\
  490. const int A=(8-x)*(8-y);\
  491. const int B=( x)*(8-y);\
  492. const int C=(8-x)*( y);\
  493. const int D=( x)*( y);\
  494. int i;\
  495. stride >>= sizeof(pixel)-1;\
  496. \
  497. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  498. \
  499. if(D){\
  500. for(i=0; i<h; i++){\
  501. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  502. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  503. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  504. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  505. dst+= stride;\
  506. src+= stride;\
  507. }\
  508. }else{\
  509. const int E= B+C;\
  510. const int step= C ? stride : 1;\
  511. for(i=0; i<h; i++){\
  512. OP(dst[0], (A*src[0] + E*src[step+0]));\
  513. OP(dst[1], (A*src[1] + E*src[step+1]));\
  514. OP(dst[2], (A*src[2] + E*src[step+2]));\
  515. OP(dst[3], (A*src[3] + E*src[step+3]));\
  516. dst+= stride;\
  517. src+= stride;\
  518. }\
  519. }\
  520. }\
  521. \
  522. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
  523. pixel *dst = (pixel*)p_dst;\
  524. pixel *src = (pixel*)p_src;\
  525. const int A=(8-x)*(8-y);\
  526. const int B=( x)*(8-y);\
  527. const int C=(8-x)*( y);\
  528. const int D=( x)*( y);\
  529. int i;\
  530. stride >>= sizeof(pixel)-1;\
  531. \
  532. av_assert2(x<8 && y<8 && x>=0 && y>=0);\
  533. \
  534. if(D){\
  535. for(i=0; i<h; i++){\
  536. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  537. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  538. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  539. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  540. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  541. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  542. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  543. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  544. dst+= stride;\
  545. src+= stride;\
  546. }\
  547. }else{\
  548. const int E= B+C;\
  549. const int step= C ? stride : 1;\
  550. for(i=0; i<h; i++){\
  551. OP(dst[0], (A*src[0] + E*src[step+0]));\
  552. OP(dst[1], (A*src[1] + E*src[step+1]));\
  553. OP(dst[2], (A*src[2] + E*src[step+2]));\
  554. OP(dst[3], (A*src[3] + E*src[step+3]));\
  555. OP(dst[4], (A*src[4] + E*src[step+4]));\
  556. OP(dst[5], (A*src[5] + E*src[step+5]));\
  557. OP(dst[6], (A*src[6] + E*src[step+6]));\
  558. OP(dst[7], (A*src[7] + E*src[step+7]));\
  559. dst+= stride;\
  560. src+= stride;\
  561. }\
  562. }\
  563. }
  564. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  565. #define op_put(a, b) a = (((b) + 32)>>6)
  566. H264_CHROMA_MC(put_ , op_put)
  567. H264_CHROMA_MC(avg_ , op_avg)
  568. #undef op_avg
  569. #undef op_put
  570. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  571. FUNCC(put_pixels8)(dst, src, stride, 8);
  572. }
  573. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  574. FUNCC(avg_pixels8)(dst, src, stride, 8);
  575. }
  576. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  577. FUNCC(put_pixels16)(dst, src, stride, 16);
  578. }
  579. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  580. FUNCC(avg_pixels16)(dst, src, stride, 16);
  581. }