You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

767 lines
56KB

  1. /*
  2. * quarterpel DSP functions
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * quarterpel DSP functions
  23. */
  24. #include <stddef.h>
  25. #include <stdint.h>
  26. #include "config.h"
  27. #include "libavutil/attributes.h"
  28. #include "copy_block.h"
  29. #include "qpeldsp.h"
  30. #define BIT_DEPTH 8
  31. #include "hpel_template.c"
  32. #include "pel_template.c"
  33. #include "qpel_template.c"
  34. #define QPEL_MC(r, OPNAME, RND, OP) \
  35. static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \
  36. int dstStride, int srcStride, \
  37. int h) \
  38. { \
  39. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  40. int i; \
  41. \
  42. for (i = 0; i < h; i++) { \
  43. OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
  44. OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
  45. OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
  46. OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
  47. OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
  48. OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
  49. OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
  50. OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
  51. dst += dstStride; \
  52. src += srcStride; \
  53. } \
  54. } \
  55. \
  56. static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, \
  57. int dstStride, int srcStride) \
  58. { \
  59. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  60. const int w = 8; \
  61. int i; \
  62. \
  63. for (i = 0; i < w; i++) { \
  64. const int src0 = src[0 * srcStride]; \
  65. const int src1 = src[1 * srcStride]; \
  66. const int src2 = src[2 * srcStride]; \
  67. const int src3 = src[3 * srcStride]; \
  68. const int src4 = src[4 * srcStride]; \
  69. const int src5 = src[5 * srcStride]; \
  70. const int src6 = src[6 * srcStride]; \
  71. const int src7 = src[7 * srcStride]; \
  72. const int src8 = src[8 * srcStride]; \
  73. OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
  74. OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
  75. OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
  76. OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
  77. OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
  78. OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
  79. OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
  80. OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
  81. dst++; \
  82. src++; \
  83. } \
  84. } \
  85. \
  86. static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, \
  87. const uint8_t *src, \
  88. int dstStride, int srcStride, \
  89. int h) \
  90. { \
  91. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  92. int i; \
  93. \
  94. for (i = 0; i < h; i++) { \
  95. OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
  96. OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
  97. OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
  98. OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
  99. OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
  100. OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \
  101. OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \
  102. OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \
  103. OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \
  104. OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \
  105. OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \
  106. OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \
  107. OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \
  108. OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
  109. OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
  110. OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
  111. dst += dstStride; \
  112. src += srcStride; \
  113. } \
  114. } \
  115. \
  116. static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, \
  117. const uint8_t *src, \
  118. int dstStride, int srcStride) \
  119. { \
  120. const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
  121. const int w = 16; \
  122. int i; \
  123. \
  124. for (i = 0; i < w; i++) { \
  125. const int src0 = src[0 * srcStride]; \
  126. const int src1 = src[1 * srcStride]; \
  127. const int src2 = src[2 * srcStride]; \
  128. const int src3 = src[3 * srcStride]; \
  129. const int src4 = src[4 * srcStride]; \
  130. const int src5 = src[5 * srcStride]; \
  131. const int src6 = src[6 * srcStride]; \
  132. const int src7 = src[7 * srcStride]; \
  133. const int src8 = src[8 * srcStride]; \
  134. const int src9 = src[9 * srcStride]; \
  135. const int src10 = src[10 * srcStride]; \
  136. const int src11 = src[11 * srcStride]; \
  137. const int src12 = src[12 * srcStride]; \
  138. const int src13 = src[13 * srcStride]; \
  139. const int src14 = src[14 * srcStride]; \
  140. const int src15 = src[15 * srcStride]; \
  141. const int src16 = src[16 * srcStride]; \
  142. OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
  143. OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
  144. OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
  145. OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
  146. OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
  147. OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \
  148. OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \
  149. OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \
  150. OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \
  151. OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \
  152. OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \
  153. OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \
  154. OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \
  155. OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
  156. OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
  157. OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
  158. dst++; \
  159. src++; \
  160. } \
  161. } \
  162. \
  163. static void OPNAME ## qpel8_mc10_c(uint8_t *dst, const uint8_t *src, \
  164. ptrdiff_t stride) \
  165. { \
  166. uint8_t half[64]; \
  167. \
  168. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
  169. OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \
  170. } \
  171. \
  172. static void OPNAME ## qpel8_mc20_c(uint8_t *dst, const uint8_t *src, \
  173. ptrdiff_t stride) \
  174. { \
  175. OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \
  176. } \
  177. \
  178. static void OPNAME ## qpel8_mc30_c(uint8_t *dst, const uint8_t *src, \
  179. ptrdiff_t stride) \
  180. { \
  181. uint8_t half[64]; \
  182. \
  183. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
  184. OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \
  185. } \
  186. \
  187. static void OPNAME ## qpel8_mc01_c(uint8_t *dst, const uint8_t *src, \
  188. ptrdiff_t stride) \
  189. { \
  190. uint8_t full[16 * 9]; \
  191. uint8_t half[64]; \
  192. \
  193. copy_block9(full, src, 16, stride, 9); \
  194. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
  195. OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \
  196. } \
  197. \
  198. static void OPNAME ## qpel8_mc02_c(uint8_t *dst, const uint8_t *src, \
  199. ptrdiff_t stride) \
  200. { \
  201. uint8_t full[16 * 9]; \
  202. \
  203. copy_block9(full, src, 16, stride, 9); \
  204. OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \
  205. } \
  206. \
  207. static void OPNAME ## qpel8_mc03_c(uint8_t *dst, const uint8_t *src, \
  208. ptrdiff_t stride) \
  209. { \
  210. uint8_t full[16 * 9]; \
  211. uint8_t half[64]; \
  212. \
  213. copy_block9(full, src, 16, stride, 9); \
  214. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
  215. OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \
  216. } \
  217. \
  218. void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, const uint8_t *src, \
  219. ptrdiff_t stride) \
  220. { \
  221. uint8_t full[16 * 9]; \
  222. uint8_t halfH[72]; \
  223. uint8_t halfV[64]; \
  224. uint8_t halfHV[64]; \
  225. \
  226. copy_block9(full, src, 16, stride, 9); \
  227. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  228. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  229. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  230. OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \
  231. stride, 16, 8, 8, 8, 8); \
  232. } \
  233. \
  234. static void OPNAME ## qpel8_mc11_c(uint8_t *dst, const uint8_t *src, \
  235. ptrdiff_t stride) \
  236. { \
  237. uint8_t full[16 * 9]; \
  238. uint8_t halfH[72]; \
  239. uint8_t halfHV[64]; \
  240. \
  241. copy_block9(full, src, 16, stride, 9); \
  242. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  243. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  244. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  245. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  246. } \
  247. \
  248. void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, const uint8_t *src, \
  249. ptrdiff_t stride) \
  250. { \
  251. uint8_t full[16 * 9]; \
  252. uint8_t halfH[72]; \
  253. uint8_t halfV[64]; \
  254. uint8_t halfHV[64]; \
  255. \
  256. copy_block9(full, src, 16, stride, 9); \
  257. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  258. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  259. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  260. OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \
  261. stride, 16, 8, 8, 8, 8); \
  262. } \
  263. \
  264. static void OPNAME ## qpel8_mc31_c(uint8_t *dst, const uint8_t *src, \
  265. ptrdiff_t stride) \
  266. { \
  267. uint8_t full[16 * 9]; \
  268. uint8_t halfH[72]; \
  269. uint8_t halfHV[64]; \
  270. \
  271. copy_block9(full, src, 16, stride, 9); \
  272. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  273. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  274. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  275. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  276. } \
  277. \
  278. void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, const uint8_t *src, \
  279. ptrdiff_t stride) \
  280. { \
  281. uint8_t full[16 * 9]; \
  282. uint8_t halfH[72]; \
  283. uint8_t halfV[64]; \
  284. uint8_t halfHV[64]; \
  285. \
  286. copy_block9(full, src, 16, stride, 9); \
  287. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  288. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  289. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  290. OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \
  291. stride, 16, 8, 8, 8, 8); \
  292. } \
  293. \
  294. static void OPNAME ## qpel8_mc13_c(uint8_t *dst, const uint8_t *src, \
  295. ptrdiff_t stride) \
  296. { \
  297. uint8_t full[16 * 9]; \
  298. uint8_t halfH[72]; \
  299. uint8_t halfHV[64]; \
  300. \
  301. copy_block9(full, src, 16, stride, 9); \
  302. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  303. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  304. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  305. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  306. } \
  307. \
  308. void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, const uint8_t *src, \
  309. ptrdiff_t stride) \
  310. { \
  311. uint8_t full[16 * 9]; \
  312. uint8_t halfH[72]; \
  313. uint8_t halfV[64]; \
  314. uint8_t halfHV[64]; \
  315. \
  316. copy_block9(full, src, 16, stride, 9); \
  317. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  318. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  319. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  320. OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \
  321. stride, 16, 8, 8, 8, 8); \
  322. } \
  323. \
  324. static void OPNAME ## qpel8_mc33_c(uint8_t *dst, const uint8_t *src, \
  325. ptrdiff_t stride) \
  326. { \
  327. uint8_t full[16 * 9]; \
  328. uint8_t halfH[72]; \
  329. uint8_t halfHV[64]; \
  330. \
  331. copy_block9(full, src, 16, stride, 9); \
  332. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  333. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  334. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  335. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  336. } \
  337. \
  338. static void OPNAME ## qpel8_mc21_c(uint8_t *dst, const uint8_t *src, \
  339. ptrdiff_t stride) \
  340. { \
  341. uint8_t halfH[72]; \
  342. uint8_t halfHV[64]; \
  343. \
  344. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  345. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  346. OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
  347. } \
  348. \
  349. static void OPNAME ## qpel8_mc23_c(uint8_t *dst, const uint8_t *src, \
  350. ptrdiff_t stride) \
  351. { \
  352. uint8_t halfH[72]; \
  353. uint8_t halfHV[64]; \
  354. \
  355. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  356. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  357. OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
  358. } \
  359. \
  360. void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, const uint8_t *src, \
  361. ptrdiff_t stride) \
  362. { \
  363. uint8_t full[16 * 9]; \
  364. uint8_t halfH[72]; \
  365. uint8_t halfV[64]; \
  366. uint8_t halfHV[64]; \
  367. \
  368. copy_block9(full, src, 16, stride, 9); \
  369. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  370. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
  371. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  372. OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
  373. } \
  374. \
  375. static void OPNAME ## qpel8_mc12_c(uint8_t *dst, const uint8_t *src, \
  376. ptrdiff_t stride) \
  377. { \
  378. uint8_t full[16 * 9]; \
  379. uint8_t halfH[72]; \
  380. \
  381. copy_block9(full, src, 16, stride, 9); \
  382. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  383. put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
  384. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  385. } \
  386. \
  387. void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, const uint8_t *src, \
  388. ptrdiff_t stride) \
  389. { \
  390. uint8_t full[16 * 9]; \
  391. uint8_t halfH[72]; \
  392. uint8_t halfV[64]; \
  393. uint8_t halfHV[64]; \
  394. \
  395. copy_block9(full, src, 16, stride, 9); \
  396. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  397. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
  398. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
  399. OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
  400. } \
  401. \
  402. static void OPNAME ## qpel8_mc32_c(uint8_t *dst, const uint8_t *src, \
  403. ptrdiff_t stride) \
  404. { \
  405. uint8_t full[16 * 9]; \
  406. uint8_t halfH[72]; \
  407. \
  408. copy_block9(full, src, 16, stride, 9); \
  409. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
  410. put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
  411. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  412. } \
  413. \
  414. static void OPNAME ## qpel8_mc22_c(uint8_t *dst, const uint8_t *src, \
  415. ptrdiff_t stride) \
  416. { \
  417. uint8_t halfH[72]; \
  418. \
  419. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
  420. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
  421. } \
  422. \
  423. static void OPNAME ## qpel16_mc10_c(uint8_t *dst, const uint8_t *src, \
  424. ptrdiff_t stride) \
  425. { \
  426. uint8_t half[256]; \
  427. \
  428. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
  429. OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \
  430. } \
  431. \
  432. static void OPNAME ## qpel16_mc20_c(uint8_t *dst, const uint8_t *src, \
  433. ptrdiff_t stride) \
  434. { \
  435. OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \
  436. } \
  437. \
  438. static void OPNAME ## qpel16_mc30_c(uint8_t *dst, const uint8_t *src, \
  439. ptrdiff_t stride) \
  440. { \
  441. uint8_t half[256]; \
  442. \
  443. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
  444. OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \
  445. } \
  446. \
  447. static void OPNAME ## qpel16_mc01_c(uint8_t *dst, const uint8_t *src, \
  448. ptrdiff_t stride) \
  449. { \
  450. uint8_t full[24 * 17]; \
  451. uint8_t half[256]; \
  452. \
  453. copy_block17(full, src, 24, stride, 17); \
  454. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
  455. OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \
  456. } \
  457. \
  458. static void OPNAME ## qpel16_mc02_c(uint8_t *dst, const uint8_t *src, \
  459. ptrdiff_t stride) \
  460. { \
  461. uint8_t full[24 * 17]; \
  462. \
  463. copy_block17(full, src, 24, stride, 17); \
  464. OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \
  465. } \
  466. \
  467. static void OPNAME ## qpel16_mc03_c(uint8_t *dst, const uint8_t *src, \
  468. ptrdiff_t stride) \
  469. { \
  470. uint8_t full[24 * 17]; \
  471. uint8_t half[256]; \
  472. \
  473. copy_block17(full, src, 24, stride, 17); \
  474. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
  475. OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \
  476. } \
  477. \
  478. void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, const uint8_t *src, \
  479. ptrdiff_t stride) \
  480. { \
  481. uint8_t full[24 * 17]; \
  482. uint8_t halfH[272]; \
  483. uint8_t halfV[256]; \
  484. uint8_t halfHV[256]; \
  485. \
  486. copy_block17(full, src, 24, stride, 17); \
  487. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  488. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  489. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  490. OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \
  491. stride, 24, 16, 16, 16, 16); \
  492. } \
  493. \
  494. static void OPNAME ## qpel16_mc11_c(uint8_t *dst, const uint8_t *src, \
  495. ptrdiff_t stride) \
  496. { \
  497. uint8_t full[24 * 17]; \
  498. uint8_t halfH[272]; \
  499. uint8_t halfHV[256]; \
  500. \
  501. copy_block17(full, src, 24, stride, 17); \
  502. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  503. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  504. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  505. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  506. } \
  507. \
  508. void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, const uint8_t *src, \
  509. ptrdiff_t stride) \
  510. { \
  511. uint8_t full[24 * 17]; \
  512. uint8_t halfH[272]; \
  513. uint8_t halfV[256]; \
  514. uint8_t halfHV[256]; \
  515. \
  516. copy_block17(full, src, 24, stride, 17); \
  517. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  518. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  519. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  520. OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \
  521. stride, 24, 16, 16, 16, 16); \
  522. } \
  523. \
  524. static void OPNAME ## qpel16_mc31_c(uint8_t *dst, const uint8_t *src, \
  525. ptrdiff_t stride) \
  526. { \
  527. uint8_t full[24 * 17]; \
  528. uint8_t halfH[272]; \
  529. uint8_t halfHV[256]; \
  530. \
  531. copy_block17(full, src, 24, stride, 17); \
  532. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  533. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  534. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  535. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  536. } \
  537. \
  538. void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, const uint8_t *src, \
  539. ptrdiff_t stride) \
  540. { \
  541. uint8_t full[24 * 17]; \
  542. uint8_t halfH[272]; \
  543. uint8_t halfV[256]; \
  544. uint8_t halfHV[256]; \
  545. \
  546. copy_block17(full, src, 24, stride, 17); \
  547. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  548. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  549. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  550. OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \
  551. stride, 24, 16, 16, 16, 16); \
  552. } \
  553. \
  554. static void OPNAME ## qpel16_mc13_c(uint8_t *dst, const uint8_t *src, \
  555. ptrdiff_t stride) \
  556. { \
  557. uint8_t full[24 * 17]; \
  558. uint8_t halfH[272]; \
  559. uint8_t halfHV[256]; \
  560. \
  561. copy_block17(full, src, 24, stride, 17); \
  562. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  563. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  564. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  565. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  566. } \
  567. \
  568. void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, const uint8_t *src, \
  569. ptrdiff_t stride) \
  570. { \
  571. uint8_t full[24 * 17]; \
  572. uint8_t halfH[272]; \
  573. uint8_t halfV[256]; \
  574. uint8_t halfHV[256]; \
  575. \
  576. copy_block17(full, src, 24, stride, 17); \
  577. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  578. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  579. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  580. OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \
  581. stride, 24, 16, 16, 16, 16); \
  582. } \
  583. \
  584. static void OPNAME ## qpel16_mc33_c(uint8_t *dst, const uint8_t *src, \
  585. ptrdiff_t stride) \
  586. { \
  587. uint8_t full[24 * 17]; \
  588. uint8_t halfH[272]; \
  589. uint8_t halfHV[256]; \
  590. \
  591. copy_block17(full, src, 24, stride, 17); \
  592. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  593. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  594. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  595. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  596. } \
  597. \
  598. static void OPNAME ## qpel16_mc21_c(uint8_t *dst, const uint8_t *src, \
  599. ptrdiff_t stride) \
  600. { \
  601. uint8_t halfH[272]; \
  602. uint8_t halfHV[256]; \
  603. \
  604. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  605. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  606. OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
  607. } \
  608. \
  609. static void OPNAME ## qpel16_mc23_c(uint8_t *dst, const uint8_t *src, \
  610. ptrdiff_t stride) \
  611. { \
  612. uint8_t halfH[272]; \
  613. uint8_t halfHV[256]; \
  614. \
  615. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  616. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  617. OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
  618. } \
  619. \
  620. void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, const uint8_t *src, \
  621. ptrdiff_t stride) \
  622. { \
  623. uint8_t full[24 * 17]; \
  624. uint8_t halfH[272]; \
  625. uint8_t halfV[256]; \
  626. uint8_t halfHV[256]; \
  627. \
  628. copy_block17(full, src, 24, stride, 17); \
  629. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  630. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
  631. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  632. OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
  633. } \
  634. \
  635. static void OPNAME ## qpel16_mc12_c(uint8_t *dst, const uint8_t *src, \
  636. ptrdiff_t stride) \
  637. { \
  638. uint8_t full[24 * 17]; \
  639. uint8_t halfH[272]; \
  640. \
  641. copy_block17(full, src, 24, stride, 17); \
  642. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  643. put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
  644. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  645. } \
  646. \
  647. void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, const uint8_t *src, \
  648. ptrdiff_t stride) \
  649. { \
  650. uint8_t full[24 * 17]; \
  651. uint8_t halfH[272]; \
  652. uint8_t halfV[256]; \
  653. uint8_t halfHV[256]; \
  654. \
  655. copy_block17(full, src, 24, stride, 17); \
  656. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  657. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
  658. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
  659. OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
  660. } \
  661. \
  662. static void OPNAME ## qpel16_mc32_c(uint8_t *dst, const uint8_t *src, \
  663. ptrdiff_t stride) \
  664. { \
  665. uint8_t full[24 * 17]; \
  666. uint8_t halfH[272]; \
  667. \
  668. copy_block17(full, src, 24, stride, 17); \
  669. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
  670. put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
  671. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  672. } \
  673. \
  674. static void OPNAME ## qpel16_mc22_c(uint8_t *dst, const uint8_t *src, \
  675. ptrdiff_t stride) \
  676. { \
  677. uint8_t halfH[272]; \
  678. \
  679. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
  680. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
  681. }
  682. #define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
  683. #define op_put(a, b) a = cm[((b) + 16) >> 5]
  684. #define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
  685. QPEL_MC(0, put_, _, op_put)
  686. QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
  687. QPEL_MC(0, avg_, _, op_avg)
  688. #undef op_avg
  689. #undef op_put
  690. #undef op_put_no_rnd
  691. void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
  692. {
  693. put_pixels8_8_c(dst, src, stride, 8);
  694. }
  695. void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
  696. {
  697. avg_pixels8_8_c(dst, src, stride, 8);
  698. }
  699. void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
  700. {
  701. put_pixels16_8_c(dst, src, stride, 16);
  702. }
  703. void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
  704. {
  705. avg_pixels16_8_c(dst, src, stride, 16);
  706. }
  707. #define put_qpel8_mc00_c ff_put_pixels8x8_c
  708. #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
  709. #define put_qpel16_mc00_c ff_put_pixels16x16_c
  710. #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
  711. #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
  712. #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
  713. void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
  714. int dst_stride, int src_stride1, int src_stride2,
  715. int h)
  716. {
  717. put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
  718. }
  719. av_cold void ff_qpeldsp_init(QpelDSPContext *c)
  720. {
  721. #define dspfunc(PFX, IDX, NUM) \
  722. c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
  723. c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
  724. c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \
  725. c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \
  726. c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \
  727. c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \
  728. c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \
  729. c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \
  730. c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \
  731. c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \
  732. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
  733. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
  734. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
  735. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
  736. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
  737. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
  738. dspfunc(put_qpel, 0, 16);
  739. dspfunc(put_qpel, 1, 8);
  740. dspfunc(put_no_rnd_qpel, 0, 16);
  741. dspfunc(put_no_rnd_qpel, 1, 8);
  742. dspfunc(avg_qpel, 0, 16);
  743. dspfunc(avg_qpel, 1, 8);
  744. if (ARCH_X86)
  745. ff_qpeldsp_init_x86(c);
  746. }