You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

543 lines
36KB

  1. /*
  2. * quarterpel DSP functions
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <stddef.h>
  21. #include <stdint.h>
  22. #include "config.h"
  23. #include "libavutil/attributes.h"
  24. #include "libavutil/cpu.h"
  25. #include "libavutil/x86/cpu.h"
  26. #include "libavcodec/pixels.h"
  27. #include "libavcodec/qpeldsp.h"
  28. #include "fpel.h"
  29. void ff_put_pixels8_l2_mmxext(uint8_t *dst,
  30. const uint8_t *src1, const uint8_t *src2,
  31. int dstStride, int src1Stride, int h);
  32. void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
  33. const uint8_t *src1, const uint8_t *src2,
  34. int dstStride, int src1Stride, int h);
  35. void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
  36. const uint8_t *src1, const uint8_t *src2,
  37. int dstStride, int src1Stride, int h);
  38. void ff_put_pixels16_l2_mmxext(uint8_t *dst,
  39. const uint8_t *src1, const uint8_t *src2,
  40. int dstStride, int src1Stride, int h);
  41. void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
  42. const uint8_t *src1, const uint8_t *src2,
  43. int dstStride, int src1Stride, int h);
  44. void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
  45. const uint8_t *src1, const uint8_t *src2,
  46. int dstStride, int src1Stride, int h);
  47. void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  48. int dstStride, int srcStride, int h);
  49. void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  50. int dstStride, int srcStride, int h);
  51. void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
  52. const uint8_t *src,
  53. int dstStride, int srcStride,
  54. int h);
  55. void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  56. int dstStride, int srcStride, int h);
  57. void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  58. int dstStride, int srcStride, int h);
  59. void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
  60. const uint8_t *src,
  61. int dstStride, int srcStride,
  62. int h);
  63. void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  64. int dstStride, int srcStride);
  65. void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  66. int dstStride, int srcStride);
  67. void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
  68. const uint8_t *src,
  69. int dstStride, int srcStride);
  70. void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  71. int dstStride, int srcStride);
  72. void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  73. int dstStride, int srcStride);
  74. void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
  75. const uint8_t *src,
  76. int dstStride, int srcStride);
  77. #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
  78. #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
  79. #if HAVE_X86ASM
  80. CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
  81. CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
  82. #define QPEL_OP(OPNAME, RND, MMX) \
  83. static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
  84. const uint8_t *src, \
  85. ptrdiff_t stride) \
  86. { \
  87. ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
  88. } \
  89. \
  90. static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
  91. const uint8_t *src, \
  92. ptrdiff_t stride) \
  93. { \
  94. uint64_t temp[8]; \
  95. uint8_t *const half = (uint8_t *) temp; \
  96. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
  97. stride, 8); \
  98. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
  99. stride, stride, 8); \
  100. } \
  101. \
  102. static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
  103. const uint8_t *src, \
  104. ptrdiff_t stride) \
  105. { \
  106. ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
  107. stride, 8); \
  108. } \
  109. \
  110. static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
  111. const uint8_t *src, \
  112. ptrdiff_t stride) \
  113. { \
  114. uint64_t temp[8]; \
  115. uint8_t *const half = (uint8_t *) temp; \
  116. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
  117. stride, 8); \
  118. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
  119. stride, 8); \
  120. } \
  121. \
  122. static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
  123. const uint8_t *src, \
  124. ptrdiff_t stride) \
  125. { \
  126. uint64_t temp[8]; \
  127. uint8_t *const half = (uint8_t *) temp; \
  128. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
  129. 8, stride); \
  130. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
  131. stride, stride, 8); \
  132. } \
  133. \
  134. static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
  135. const uint8_t *src, \
  136. ptrdiff_t stride) \
  137. { \
  138. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
  139. stride, stride); \
  140. } \
  141. \
  142. static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
  143. const uint8_t *src, \
  144. ptrdiff_t stride) \
  145. { \
  146. uint64_t temp[8]; \
  147. uint8_t *const half = (uint8_t *) temp; \
  148. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
  149. 8, stride); \
  150. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
  151. stride, 8); \
  152. } \
  153. \
  154. static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
  155. const uint8_t *src, \
  156. ptrdiff_t stride) \
  157. { \
  158. uint64_t half[8 + 9]; \
  159. uint8_t *const halfH = (uint8_t *) half + 64; \
  160. uint8_t *const halfHV = (uint8_t *) half; \
  161. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  162. stride, 9); \
  163. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
  164. stride, 9); \
  165. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  166. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  167. stride, 8, 8); \
  168. } \
  169. \
  170. static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
  171. const uint8_t *src, \
  172. ptrdiff_t stride) \
  173. { \
  174. uint64_t half[8 + 9]; \
  175. uint8_t *const halfH = (uint8_t *) half + 64; \
  176. uint8_t *const halfHV = (uint8_t *) half; \
  177. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  178. stride, 9); \
  179. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  180. stride, 9); \
  181. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  182. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  183. stride, 8, 8); \
  184. } \
  185. \
  186. static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
  187. const uint8_t *src, \
  188. ptrdiff_t stride) \
  189. { \
  190. uint64_t half[8 + 9]; \
  191. uint8_t *const halfH = (uint8_t *) half + 64; \
  192. uint8_t *const halfHV = (uint8_t *) half; \
  193. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  194. stride, 9); \
  195. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
  196. stride, 9); \
  197. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  198. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  199. stride, 8, 8); \
  200. } \
  201. \
  202. static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
  203. const uint8_t *src, \
  204. ptrdiff_t stride) \
  205. { \
  206. uint64_t half[8 + 9]; \
  207. uint8_t *const halfH = (uint8_t *) half + 64; \
  208. uint8_t *const halfHV = (uint8_t *) half; \
  209. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  210. stride, 9); \
  211. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  212. stride, 9); \
  213. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  214. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  215. stride, 8, 8); \
  216. } \
  217. \
  218. static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
  219. const uint8_t *src, \
  220. ptrdiff_t stride) \
  221. { \
  222. uint64_t half[8 + 9]; \
  223. uint8_t *const halfH = (uint8_t *) half + 64; \
  224. uint8_t *const halfHV = (uint8_t *) half; \
  225. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  226. stride, 9); \
  227. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  228. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  229. stride, 8, 8); \
  230. } \
  231. \
  232. static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
  233. const uint8_t *src, \
  234. ptrdiff_t stride) \
  235. { \
  236. uint64_t half[8 + 9]; \
  237. uint8_t *const halfH = (uint8_t *) half + 64; \
  238. uint8_t *const halfHV = (uint8_t *) half; \
  239. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  240. stride, 9); \
  241. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  242. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  243. stride, 8, 8); \
  244. } \
  245. \
  246. static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
  247. const uint8_t *src, \
  248. ptrdiff_t stride) \
  249. { \
  250. uint64_t half[8 + 9]; \
  251. uint8_t *const halfH = (uint8_t *) half; \
  252. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  253. stride, 9); \
  254. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
  255. 8, stride, 9); \
  256. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  257. stride, 8); \
  258. } \
  259. \
  260. static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
  261. const uint8_t *src, \
  262. ptrdiff_t stride) \
  263. { \
  264. uint64_t half[8 + 9]; \
  265. uint8_t *const halfH = (uint8_t *) half; \
  266. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  267. stride, 9); \
  268. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  269. stride, 9); \
  270. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  271. stride, 8); \
  272. } \
  273. \
  274. static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
  275. const uint8_t *src, \
  276. ptrdiff_t stride) \
  277. { \
  278. uint64_t half[9]; \
  279. uint8_t *const halfH = (uint8_t *) half; \
  280. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  281. stride, 9); \
  282. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  283. stride, 8); \
  284. } \
  285. \
  286. static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
  287. const uint8_t *src, \
  288. ptrdiff_t stride) \
  289. { \
  290. ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
  291. } \
  292. \
  293. static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
  294. const uint8_t *src, \
  295. ptrdiff_t stride) \
  296. { \
  297. uint64_t temp[32]; \
  298. uint8_t *const half = (uint8_t *) temp; \
  299. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
  300. stride, 16); \
  301. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
  302. stride, 16); \
  303. } \
  304. \
  305. static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
  306. const uint8_t *src, \
  307. ptrdiff_t stride) \
  308. { \
  309. ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
  310. stride, stride, 16);\
  311. } \
  312. \
  313. static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
  314. const uint8_t *src, \
  315. ptrdiff_t stride) \
  316. { \
  317. uint64_t temp[32]; \
  318. uint8_t *const half = (uint8_t*) temp; \
  319. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
  320. stride, 16); \
  321. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
  322. stride, stride, 16); \
  323. } \
  324. \
  325. static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
  326. const uint8_t *src, \
  327. ptrdiff_t stride) \
  328. { \
  329. uint64_t temp[32]; \
  330. uint8_t *const half = (uint8_t *) temp; \
  331. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
  332. stride); \
  333. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
  334. stride, 16); \
  335. } \
  336. \
  337. static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
  338. const uint8_t *src, \
  339. ptrdiff_t stride) \
  340. { \
  341. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
  342. stride, stride); \
  343. } \
  344. \
  345. static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
  346. const uint8_t *src, \
  347. ptrdiff_t stride) \
  348. { \
  349. uint64_t temp[32]; \
  350. uint8_t *const half = (uint8_t *) temp; \
  351. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
  352. stride); \
  353. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
  354. stride, stride, 16); \
  355. } \
  356. \
  357. static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
  358. const uint8_t *src, \
  359. ptrdiff_t stride) \
  360. { \
  361. uint64_t half[16 * 2 + 17 * 2]; \
  362. uint8_t *const halfH = (uint8_t *) half + 256; \
  363. uint8_t *const halfHV = (uint8_t *) half; \
  364. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  365. stride, 17); \
  366. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  367. stride, 17); \
  368. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  369. 16, 16); \
  370. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  371. stride, 16, 16); \
  372. } \
  373. \
  374. static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
  375. const uint8_t *src, \
  376. ptrdiff_t stride) \
  377. { \
  378. uint64_t half[16 * 2 + 17 * 2]; \
  379. uint8_t *const halfH = (uint8_t *) half + 256; \
  380. uint8_t *const halfHV = (uint8_t *) half; \
  381. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  382. stride, 17); \
  383. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  384. stride, 17); \
  385. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  386. 16, 16); \
  387. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  388. stride, 16, 16); \
  389. } \
  390. \
  391. static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
  392. const uint8_t *src, \
  393. ptrdiff_t stride) \
  394. { \
  395. uint64_t half[16 * 2 + 17 * 2]; \
  396. uint8_t *const halfH = (uint8_t *) half + 256; \
  397. uint8_t *const halfHV = (uint8_t *) half; \
  398. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  399. stride, 17); \
  400. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  401. stride, 17); \
  402. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  403. 16, 16); \
  404. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  405. stride, 16, 16); \
  406. } \
  407. \
  408. static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
  409. const uint8_t *src, \
  410. ptrdiff_t stride) \
  411. { \
  412. uint64_t half[16 * 2 + 17 * 2]; \
  413. uint8_t *const halfH = (uint8_t *) half + 256; \
  414. uint8_t *const halfHV = (uint8_t *) half; \
  415. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  416. stride, 17); \
  417. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  418. stride, 17); \
  419. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  420. 16, 16); \
  421. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  422. stride, 16, 16); \
  423. } \
  424. \
  425. static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
  426. const uint8_t *src, \
  427. ptrdiff_t stride) \
  428. { \
  429. uint64_t half[16 * 2 + 17 * 2]; \
  430. uint8_t *const halfH = (uint8_t *) half + 256; \
  431. uint8_t *const halfHV = (uint8_t *) half; \
  432. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  433. stride, 17); \
  434. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  435. 16, 16); \
  436. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  437. stride, 16, 16); \
  438. } \
  439. \
  440. static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
  441. const uint8_t *src, \
  442. ptrdiff_t stride) \
  443. { \
  444. uint64_t half[16 * 2 + 17 * 2]; \
  445. uint8_t *const halfH = (uint8_t *) half + 256; \
  446. uint8_t *const halfHV = (uint8_t *) half; \
  447. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  448. stride, 17); \
  449. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  450. 16, 16); \
  451. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  452. stride, 16, 16); \
  453. } \
  454. \
  455. static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
  456. const uint8_t *src, \
  457. ptrdiff_t stride) \
  458. { \
  459. uint64_t half[17 * 2]; \
  460. uint8_t *const halfH = (uint8_t *) half; \
  461. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  462. stride, 17); \
  463. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  464. stride, 17); \
  465. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  466. stride, 16); \
  467. } \
  468. \
  469. static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
  470. const uint8_t *src, \
  471. ptrdiff_t stride) \
  472. { \
  473. uint64_t half[17 * 2]; \
  474. uint8_t *const halfH = (uint8_t *) half; \
  475. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  476. stride, 17); \
  477. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  478. stride, 17); \
  479. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  480. stride, 16); \
  481. } \
  482. \
  483. static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
  484. const uint8_t *src, \
  485. ptrdiff_t stride) \
  486. { \
  487. uint64_t half[17 * 2]; \
  488. uint8_t *const halfH = (uint8_t *) half; \
  489. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  490. stride, 17); \
  491. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  492. stride, 16); \
  493. }
  494. QPEL_OP(put_, _, mmxext)
  495. QPEL_OP(avg_, _, mmxext)
  496. QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
  497. #endif /* HAVE_X86ASM */
  498. #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
  499. do { \
  500. c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
  501. c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
  502. c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
  503. c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
  504. c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
  505. c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
  506. c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
  507. c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
  508. c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
  509. c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
  510. c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
  511. c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
  512. c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
  513. c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
  514. c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
  515. c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
  516. } while (0)
  517. av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
  518. {
  519. int cpu_flags = av_get_cpu_flags();
  520. if (X86_MMXEXT(cpu_flags)) {
  521. #if HAVE_MMXEXT_EXTERNAL
  522. SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
  523. SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
  524. SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
  525. SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
  526. SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
  527. SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
  528. #endif /* HAVE_MMXEXT_EXTERNAL */
  529. }
  530. }