You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

765 lines
56KB

  1. /*
  2. * quarterpel DSP functions
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * quarterpel DSP functions
  23. */
  24. #include <stddef.h>
  25. #include <stdint.h>
  26. #include "config.h"
  27. #include "libavutil/attributes.h"
  28. #include "copy_block.h"
  29. #include "qpeldsp.h"
  30. #define BIT_DEPTH 8
  31. #include "hpel_template.c"
  32. #include "pel_template.c"
  33. #include "qpel_template.c"
  34. #define QPEL_MC(r, OPNAME, RND, OP) \
  35. static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \
  36. int dstStride, int srcStride, \
  37. int h) \
  38. { \
  39. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  40. int i; \
  41. \
  42. for (i = 0; i < h; i++) { \
  43. OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
  44. OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
  45. OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
  46. OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
  47. OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
  48. OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
  49. OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
  50. OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
  51. dst += dstStride; \
  52. src += srcStride; \
  53. } \
  54. } \
  55. \
  56. static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \
  57. int dstStride, int srcStride) \
  58. { \
  59. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  60. const int w = 8; \
  61. int i; \
  62. \
  63. for (i = 0; i < w; i++) { \
  64. const int src0 = src[0 * srcStride]; \
  65. const int src1 = src[1 * srcStride]; \
  66. const int src2 = src[2 * srcStride]; \
  67. const int src3 = src[3 * srcStride]; \
  68. const int src4 = src[4 * srcStride]; \
  69. const int src5 = src[5 * srcStride]; \
  70. const int src6 = src[6 * srcStride]; \
  71. const int src7 = src[7 * srcStride]; \
  72. const int src8 = src[8 * srcStride]; \
  73. OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
  74. OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
  75. OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
  76. OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
  77. OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
  78. OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
  79. OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
  80. OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
  81. dst++; \
  82. src++; \
  83. } \
  84. } \
  85. \
  86. static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \
  87. int dstStride, int srcStride, \
  88. int h) \
  89. { \
  90. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  91. int i; \
  92. \
  93. for (i = 0; i < h; i++) { \
  94. OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
  95. OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
  96. OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
  97. OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
  98. OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
  99. OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \
  100. OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \
  101. OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \
  102. OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \
  103. OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \
  104. OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \
  105. OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \
  106. OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \
  107. OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
  108. OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
  109. OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
  110. dst += dstStride; \
  111. src += srcStride; \
  112. } \
  113. } \
  114. \
  115. static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \
  116. int dstStride, int srcStride) \
  117. { \
  118. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  119. const int w = 16; \
  120. int i; \
  121. \
  122. for (i = 0; i < w; i++) { \
  123. const int src0 = src[0 * srcStride]; \
  124. const int src1 = src[1 * srcStride]; \
  125. const int src2 = src[2 * srcStride]; \
  126. const int src3 = src[3 * srcStride]; \
  127. const int src4 = src[4 * srcStride]; \
  128. const int src5 = src[5 * srcStride]; \
  129. const int src6 = src[6 * srcStride]; \
  130. const int src7 = src[7 * srcStride]; \
  131. const int src8 = src[8 * srcStride]; \
  132. const int src9 = src[9 * srcStride]; \
  133. const int src10 = src[10 * srcStride]; \
  134. const int src11 = src[11 * srcStride]; \
  135. const int src12 = src[12 * srcStride]; \
  136. const int src13 = src[13 * srcStride]; \
  137. const int src14 = src[14 * srcStride]; \
  138. const int src15 = src[15 * srcStride]; \
  139. const int src16 = src[16 * srcStride]; \
  140. OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
  141. OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
  142. OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
  143. OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
  144. OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
  145. OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \
  146. OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \
  147. OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \
  148. OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \
  149. OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \
  150. OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \
  151. OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \
  152. OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \
  153. OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
  154. OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
  155. OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
  156. dst++; \
  157. src++; \
  158. } \
  159. } \
  160. \
  161. static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \
  162. ptrdiff_t stride) \
  163. { \
  164. uint8_t half[64]; \
  165. \
  166. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
  167. OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \
  168. } \
  169. \
  170. static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \
  171. ptrdiff_t stride) \
  172. { \
  173. OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \
  174. } \
  175. \
  176. static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \
  177. ptrdiff_t stride) \
  178. { \
  179. uint8_t half[64]; \
  180. \
  181. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
  182. OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \
  183. } \
  184. \
  185. static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \
  186. ptrdiff_t stride) \
  187. { \
  188. uint8_t full[16 * 9]; \
  189. uint8_t half[64]; \
  190. \
  191. copy_block9(full, src, 16, stride, 9); \
  192. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
  193. OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \
  194. } \
  195. \
  196. static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \
  197. ptrdiff_t stride) \
  198. { \
  199. uint8_t full[16 * 9]; \
  200. \
  201. copy_block9(full, src, 16, stride, 9); \
  202. OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \
  203. } \
  204. \
  205. static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \
  206. ptrdiff_t stride) \
  207. { \
  208. uint8_t full[16 * 9]; \
  209. uint8_t half[64]; \
  210. \
  211. copy_block9(full, src, 16, stride, 9); \
  212. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
  213. OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \
  214. } \
  215. \
  216. void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \
  217. ptrdiff_t stride) \
  218. { \
  219. uint8_t full[16 * 9]; \
  220. uint8_t halfH[72]; \
  221. uint8_t halfV[64]; \
  222. uint8_t halfHV[64]; \
  223. \
  224. copy_block9(full, src, 16, stride, 9); \
  225. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  226. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  227. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  228. OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \
  229. stride, 16, 8, 8, 8, 8); \
  230. } \
  231. \
  232. static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \
  233. ptrdiff_t stride) \
  234. { \
  235. uint8_t full[16 * 9]; \
  236. uint8_t halfH[72]; \
  237. uint8_t halfHV[64]; \
  238. \
  239. copy_block9(full, src, 16, stride, 9); \
  240. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  241. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  242. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  243. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  244. } \
  245. \
  246. void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \
  247. ptrdiff_t stride) \
  248. { \
  249. uint8_t full[16 * 9]; \
  250. uint8_t halfH[72]; \
  251. uint8_t halfV[64]; \
  252. uint8_t halfHV[64]; \
  253. \
  254. copy_block9(full, src, 16, stride, 9); \
  255. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  256. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  257. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  258. OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \
  259. stride, 16, 8, 8, 8, 8); \
  260. } \
  261. \
  262. static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \
  263. ptrdiff_t stride) \
  264. { \
  265. uint8_t full[16 * 9]; \
  266. uint8_t halfH[72]; \
  267. uint8_t halfHV[64]; \
  268. \
  269. copy_block9(full, src, 16, stride, 9); \
  270. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  271. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  272. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  273. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  274. } \
  275. \
  276. void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \
  277. ptrdiff_t stride) \
  278. { \
  279. uint8_t full[16 * 9]; \
  280. uint8_t halfH[72]; \
  281. uint8_t halfV[64]; \
  282. uint8_t halfHV[64]; \
  283. \
  284. copy_block9(full, src, 16, stride, 9); \
  285. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  286. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  287. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  288. OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \
  289. stride, 16, 8, 8, 8, 8); \
  290. } \
  291. \
  292. static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \
  293. ptrdiff_t stride) \
  294. { \
  295. uint8_t full[16 * 9]; \
  296. uint8_t halfH[72]; \
  297. uint8_t halfHV[64]; \
  298. \
  299. copy_block9(full, src, 16, stride, 9); \
  300. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  301. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  302. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  303. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  304. } \
  305. \
  306. void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \
  307. ptrdiff_t stride) \
  308. { \
  309. uint8_t full[16 * 9]; \
  310. uint8_t halfH[72]; \
  311. uint8_t halfV[64]; \
  312. uint8_t halfHV[64]; \
  313. \
  314. copy_block9(full, src, 16, stride, 9); \
  315. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  316. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  317. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  318. OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \
  319. stride, 16, 8, 8, 8, 8); \
  320. } \
  321. \
  322. static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \
  323. ptrdiff_t stride) \
  324. { \
  325. uint8_t full[16 * 9]; \
  326. uint8_t halfH[72]; \
  327. uint8_t halfHV[64]; \
  328. \
  329. copy_block9(full, src, 16, stride, 9); \
  330. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  331. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  332. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  333. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  334. } \
  335. \
  336. static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \
  337. ptrdiff_t stride) \
  338. { \
  339. uint8_t halfH[72]; \
  340. uint8_t halfHV[64]; \
  341. \
  342. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  343. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  344. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  345. } \
  346. \
  347. static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \
  348. ptrdiff_t stride) \
  349. { \
  350. uint8_t halfH[72]; \
  351. uint8_t halfHV[64]; \
  352. \
  353. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  354. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  355. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  356. } \
  357. \
  358. void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \
  359. ptrdiff_t stride) \
  360. { \
  361. uint8_t full[16 * 9]; \
  362. uint8_t halfH[72]; \
  363. uint8_t halfV[64]; \
  364. uint8_t halfHV[64]; \
  365. \
  366. copy_block9(full, src, 16, stride, 9); \
  367. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  368. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  369. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  370. OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
  371. } \
  372. \
  373. static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \
  374. ptrdiff_t stride) \
  375. { \
  376. uint8_t full[16 * 9]; \
  377. uint8_t halfH[72]; \
  378. \
  379. copy_block9(full, src, 16, stride, 9); \
  380. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  381. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  382. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  383. } \
  384. \
  385. void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \
  386. ptrdiff_t stride) \
  387. { \
  388. uint8_t full[16 * 9]; \
  389. uint8_t halfH[72]; \
  390. uint8_t halfV[64]; \
  391. uint8_t halfHV[64]; \
  392. \
  393. copy_block9(full, src, 16, stride, 9); \
  394. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  395. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  396. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  397. OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
  398. } \
  399. \
  400. static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \
  401. ptrdiff_t stride) \
  402. { \
  403. uint8_t full[16 * 9]; \
  404. uint8_t halfH[72]; \
  405. \
  406. copy_block9(full, src, 16, stride, 9); \
  407. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  408. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  409. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  410. } \
  411. \
  412. static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \
  413. ptrdiff_t stride) \
  414. { \
  415. uint8_t halfH[72]; \
  416. \
  417. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  418. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  419. } \
  420. \
  421. static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \
  422. ptrdiff_t stride) \
  423. { \
  424. uint8_t half[256]; \
  425. \
  426. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
  427. OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \
  428. } \
  429. \
  430. static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \
  431. ptrdiff_t stride) \
  432. { \
  433. OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \
  434. } \
  435. \
  436. static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \
  437. ptrdiff_t stride) \
  438. { \
  439. uint8_t half[256]; \
  440. \
  441. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
  442. OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \
  443. } \
  444. \
  445. static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \
  446. ptrdiff_t stride) \
  447. { \
  448. uint8_t full[24 * 17]; \
  449. uint8_t half[256]; \
  450. \
  451. copy_block17(full, src, 24, stride, 17); \
  452. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
  453. OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \
  454. } \
  455. \
  456. static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \
  457. ptrdiff_t stride) \
  458. { \
  459. uint8_t full[24 * 17]; \
  460. \
  461. copy_block17(full, src, 24, stride, 17); \
  462. OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \
  463. } \
  464. \
  465. static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \
  466. ptrdiff_t stride) \
  467. { \
  468. uint8_t full[24 * 17]; \
  469. uint8_t half[256]; \
  470. \
  471. copy_block17(full, src, 24, stride, 17); \
  472. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
  473. OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \
  474. } \
  475. \
  476. void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \
  477. ptrdiff_t stride) \
  478. { \
  479. uint8_t full[24 * 17]; \
  480. uint8_t halfH[272]; \
  481. uint8_t halfV[256]; \
  482. uint8_t halfHV[256]; \
  483. \
  484. copy_block17(full, src, 24, stride, 17); \
  485. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  486. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  487. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  488. OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \
  489. stride, 24, 16, 16, 16, 16); \
  490. } \
  491. \
  492. static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \
  493. ptrdiff_t stride) \
  494. { \
  495. uint8_t full[24 * 17]; \
  496. uint8_t halfH[272]; \
  497. uint8_t halfHV[256]; \
  498. \
  499. copy_block17(full, src, 24, stride, 17); \
  500. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  501. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  502. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  503. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  504. } \
  505. \
  506. void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \
  507. ptrdiff_t stride) \
  508. { \
  509. uint8_t full[24 * 17]; \
  510. uint8_t halfH[272]; \
  511. uint8_t halfV[256]; \
  512. uint8_t halfHV[256]; \
  513. \
  514. copy_block17(full, src, 24, stride, 17); \
  515. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  516. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  517. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  518. OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \
  519. stride, 24, 16, 16, 16, 16); \
  520. } \
  521. \
  522. static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \
  523. ptrdiff_t stride) \
  524. { \
  525. uint8_t full[24 * 17]; \
  526. uint8_t halfH[272]; \
  527. uint8_t halfHV[256]; \
  528. \
  529. copy_block17(full, src, 24, stride, 17); \
  530. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  531. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  532. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  533. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  534. } \
  535. \
  536. void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \
  537. ptrdiff_t stride) \
  538. { \
  539. uint8_t full[24 * 17]; \
  540. uint8_t halfH[272]; \
  541. uint8_t halfV[256]; \
  542. uint8_t halfHV[256]; \
  543. \
  544. copy_block17(full, src, 24, stride, 17); \
  545. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  546. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  547. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  548. OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \
  549. stride, 24, 16, 16, 16, 16); \
  550. } \
  551. \
  552. static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \
  553. ptrdiff_t stride) \
  554. { \
  555. uint8_t full[24 * 17]; \
  556. uint8_t halfH[272]; \
  557. uint8_t halfHV[256]; \
  558. \
  559. copy_block17(full, src, 24, stride, 17); \
  560. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  561. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  562. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  563. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  564. } \
  565. \
  566. void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \
  567. ptrdiff_t stride) \
  568. { \
  569. uint8_t full[24 * 17]; \
  570. uint8_t halfH[272]; \
  571. uint8_t halfV[256]; \
  572. uint8_t halfHV[256]; \
  573. \
  574. copy_block17(full, src, 24, stride, 17); \
  575. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  576. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  577. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  578. OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \
  579. stride, 24, 16, 16, 16, 16); \
  580. } \
  581. \
  582. static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \
  583. ptrdiff_t stride) \
  584. { \
  585. uint8_t full[24 * 17]; \
  586. uint8_t halfH[272]; \
  587. uint8_t halfHV[256]; \
  588. \
  589. copy_block17(full, src, 24, stride, 17); \
  590. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  591. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  592. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  593. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  594. } \
  595. \
  596. static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \
  597. ptrdiff_t stride) \
  598. { \
  599. uint8_t halfH[272]; \
  600. uint8_t halfHV[256]; \
  601. \
  602. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  603. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  604. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  605. } \
  606. \
  607. static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \
  608. ptrdiff_t stride) \
  609. { \
  610. uint8_t halfH[272]; \
  611. uint8_t halfHV[256]; \
  612. \
  613. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  614. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  615. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  616. } \
  617. \
  618. void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \
  619. ptrdiff_t stride) \
  620. { \
  621. uint8_t full[24 * 17]; \
  622. uint8_t halfH[272]; \
  623. uint8_t halfV[256]; \
  624. uint8_t halfHV[256]; \
  625. \
  626. copy_block17(full, src, 24, stride, 17); \
  627. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  628. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  629. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  630. OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
  631. } \
  632. \
  633. static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \
  634. ptrdiff_t stride) \
  635. { \
  636. uint8_t full[24 * 17]; \
  637. uint8_t halfH[272]; \
  638. \
  639. copy_block17(full, src, 24, stride, 17); \
  640. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  641. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  642. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  643. } \
  644. \
  645. void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \
  646. ptrdiff_t stride) \
  647. { \
  648. uint8_t full[24 * 17]; \
  649. uint8_t halfH[272]; \
  650. uint8_t halfV[256]; \
  651. uint8_t halfHV[256]; \
  652. \
  653. copy_block17(full, src, 24, stride, 17); \
  654. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  655. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  656. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  657. OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
  658. } \
  659. \
  660. static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \
  661. ptrdiff_t stride) \
  662. { \
  663. uint8_t full[24 * 17]; \
  664. uint8_t halfH[272]; \
  665. \
  666. copy_block17(full, src, 24, stride, 17); \
  667. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  668. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  669. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  670. } \
  671. \
  672. static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \
  673. ptrdiff_t stride) \
  674. { \
  675. uint8_t halfH[272]; \
  676. \
  677. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  678. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  679. }
  680. #define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
  681. #define op_put(a, b) a = cm[((b) + 16) >> 5]
  682. #define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
  683. QPEL_MC(0, put_, _, op_put)
  684. QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
  685. QPEL_MC(0, avg_, _, op_avg)
  686. #undef op_avg
  687. #undef op_put
  688. #undef op_put_no_rnd
  689. void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
  690. {
  691. put_pixels8_8_c(dst, src, stride, 8);
  692. }
  693. void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
  694. {
  695. avg_pixels8_8_c(dst, src, stride, 8);
  696. }
  697. void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
  698. {
  699. put_pixels16_8_c(dst, src, stride, 16);
  700. }
  701. void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
  702. {
  703. avg_pixels16_8_c(dst, src, stride, 16);
  704. }
  705. #define put_qpel8_mc00_c ff_put_pixels8x8_c
  706. #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
  707. #define put_qpel16_mc00_c ff_put_pixels16x16_c
  708. #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
  709. #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
  710. #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
  711. void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
  712. int dst_stride, int src_stride1, int src_stride2,
  713. int h)
  714. {
  715. put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
  716. }
  717. av_cold void ff_qpeldsp_init(QpelDSPContext *c)
  718. {
  719. #define dspfunc(PFX, IDX, NUM) \
  720. c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
  721. c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
  722. c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \
  723. c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \
  724. c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \
  725. c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \
  726. c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \
  727. c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \
  728. c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \
  729. c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \
  730. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
  731. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
  732. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
  733. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
  734. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
  735. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
  736. dspfunc(put_qpel, 0, 16);
  737. dspfunc(put_qpel, 1, 8);
  738. dspfunc(put_no_rnd_qpel, 0, 16);
  739. dspfunc(put_no_rnd_qpel, 1, 8);
  740. dspfunc(avg_qpel, 0, 16);
  741. dspfunc(avg_qpel, 1, 8);
  742. if (ARCH_X86)
  743. ff_qpeldsp_init_x86(c);
  744. }