You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

255 lines
9.5KB

  1. /*
  2. * Half-pel DSP functions.
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * Half-pel DSP functions.
  27. */
  28. #include "bit_depth_template.c"
  29. #include "hpel_template.c"
  30. #define PIXOP2(OPNAME, OP) \
  31. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  32. int src_stride1, int src_stride2, int h){\
  33. int i;\
  34. for(i=0; i<h; i++){\
  35. pixel4 a,b;\
  36. a= AV_RN4P(&src1[i*src_stride1 ]);\
  37. b= AV_RN4P(&src2[i*src_stride2 ]);\
  38. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  39. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  40. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  41. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  42. }\
  43. }\
  44. \
  45. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  46. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  47. }\
  48. \
  49. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  50. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  51. }\
  52. \
  53. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  54. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  55. }\
  56. \
  57. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  58. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  59. }\
  60. \
  61. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  62. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  63. }\
  64. \
  65. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  66. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  67. }\
  68. \
  69. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  70. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  71. }\
  72. \
  73. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
  74. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  75. }\
  76. \
  77. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, ptrdiff_t line_size, int h)\
  78. {\
  79. int i, a0, b0, a1, b1;\
  80. pixel *block = (pixel*)_block;\
  81. const pixel *pixels = (const pixel*)_pixels;\
  82. line_size /= sizeof(pixel);\
  83. a0= pixels[0];\
  84. b0= pixels[1] + 2;\
  85. a0 += b0;\
  86. b0 += pixels[2];\
  87. \
  88. pixels+=line_size;\
  89. for(i=0; i<h; i+=2){\
  90. a1= pixels[0];\
  91. b1= pixels[1];\
  92. a1 += b1;\
  93. b1 += pixels[2];\
  94. \
  95. block[0]= (a1+a0)>>2; /* FIXME non put */\
  96. block[1]= (b1+b0)>>2;\
  97. \
  98. pixels+=line_size;\
  99. block +=line_size;\
  100. \
  101. a0= pixels[0];\
  102. b0= pixels[1] + 2;\
  103. a0 += b0;\
  104. b0 += pixels[2];\
  105. \
  106. block[0]= (a1+a0)>>2;\
  107. block[1]= (b1+b0)>>2;\
  108. pixels+=line_size;\
  109. block +=line_size;\
  110. }\
  111. }\
  112. \
  113. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  114. {\
  115. /* FIXME HIGH BIT DEPTH */\
  116. int i;\
  117. const uint32_t a= AV_RN32(pixels );\
  118. const uint32_t b= AV_RN32(pixels+1);\
  119. uint32_t l0= (a&0x03030303UL)\
  120. + (b&0x03030303UL)\
  121. + 0x02020202UL;\
  122. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  123. + ((b&0xFCFCFCFCUL)>>2);\
  124. uint32_t l1,h1;\
  125. \
  126. pixels+=line_size;\
  127. for(i=0; i<h; i+=2){\
  128. uint32_t a= AV_RN32(pixels );\
  129. uint32_t b= AV_RN32(pixels+1);\
  130. l1= (a&0x03030303UL)\
  131. + (b&0x03030303UL);\
  132. h1= ((a&0xFCFCFCFCUL)>>2)\
  133. + ((b&0xFCFCFCFCUL)>>2);\
  134. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  135. pixels+=line_size;\
  136. block +=line_size;\
  137. a= AV_RN32(pixels );\
  138. b= AV_RN32(pixels+1);\
  139. l0= (a&0x03030303UL)\
  140. + (b&0x03030303UL)\
  141. + 0x02020202UL;\
  142. h0= ((a&0xFCFCFCFCUL)>>2)\
  143. + ((b&0xFCFCFCFCUL)>>2);\
  144. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  145. pixels+=line_size;\
  146. block +=line_size;\
  147. }\
  148. }\
  149. \
  150. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  151. {\
  152. /* FIXME HIGH BIT DEPTH */\
  153. int j;\
  154. for(j=0; j<2; j++){\
  155. int i;\
  156. const uint32_t a= AV_RN32(pixels );\
  157. const uint32_t b= AV_RN32(pixels+1);\
  158. uint32_t l0= (a&0x03030303UL)\
  159. + (b&0x03030303UL)\
  160. + 0x02020202UL;\
  161. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  162. + ((b&0xFCFCFCFCUL)>>2);\
  163. uint32_t l1,h1;\
  164. \
  165. pixels+=line_size;\
  166. for(i=0; i<h; i+=2){\
  167. uint32_t a= AV_RN32(pixels );\
  168. uint32_t b= AV_RN32(pixels+1);\
  169. l1= (a&0x03030303UL)\
  170. + (b&0x03030303UL);\
  171. h1= ((a&0xFCFCFCFCUL)>>2)\
  172. + ((b&0xFCFCFCFCUL)>>2);\
  173. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  174. pixels+=line_size;\
  175. block +=line_size;\
  176. a= AV_RN32(pixels );\
  177. b= AV_RN32(pixels+1);\
  178. l0= (a&0x03030303UL)\
  179. + (b&0x03030303UL)\
  180. + 0x02020202UL;\
  181. h0= ((a&0xFCFCFCFCUL)>>2)\
  182. + ((b&0xFCFCFCFCUL)>>2);\
  183. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  184. pixels+=line_size;\
  185. block +=line_size;\
  186. }\
  187. pixels+=4-line_size*(h+1);\
  188. block +=4-line_size*h;\
  189. }\
  190. }\
  191. \
  192. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
  193. {\
  194. /* FIXME HIGH BIT DEPTH */\
  195. int j;\
  196. for(j=0; j<2; j++){\
  197. int i;\
  198. const uint32_t a= AV_RN32(pixels );\
  199. const uint32_t b= AV_RN32(pixels+1);\
  200. uint32_t l0= (a&0x03030303UL)\
  201. + (b&0x03030303UL)\
  202. + 0x01010101UL;\
  203. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  204. + ((b&0xFCFCFCFCUL)>>2);\
  205. uint32_t l1,h1;\
  206. \
  207. pixels+=line_size;\
  208. for(i=0; i<h; i+=2){\
  209. uint32_t a= AV_RN32(pixels );\
  210. uint32_t b= AV_RN32(pixels+1);\
  211. l1= (a&0x03030303UL)\
  212. + (b&0x03030303UL);\
  213. h1= ((a&0xFCFCFCFCUL)>>2)\
  214. + ((b&0xFCFCFCFCUL)>>2);\
  215. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  216. pixels+=line_size;\
  217. block +=line_size;\
  218. a= AV_RN32(pixels );\
  219. b= AV_RN32(pixels+1);\
  220. l0= (a&0x03030303UL)\
  221. + (b&0x03030303UL)\
  222. + 0x01010101UL;\
  223. h0= ((a&0xFCFCFCFCUL)>>2)\
  224. + ((b&0xFCFCFCFCUL)>>2);\
  225. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  226. pixels+=line_size;\
  227. block +=line_size;\
  228. }\
  229. pixels+=4-line_size*(h+1);\
  230. block +=4-line_size*h;\
  231. }\
  232. }\
  233. \
  234. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  235. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  236. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  237. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  238. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  239. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  240. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  241. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  242. #define op_put(a, b) a = b
  243. #if BIT_DEPTH == 8
  244. #define put_no_rnd_pixels8_8_c put_pixels8_8_c
  245. PIXOP2(avg, op_avg)
  246. PIXOP2(put, op_put)
  247. #endif
  248. #undef op_avg
  249. #undef op_put