You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
8.8KB

  1. /*
  2. * MMX optimized DSP utils
  3. * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #ifndef AVCODEC_X86_DSPUTIL_MMX_H
  22. #define AVCODEC_X86_DSPUTIL_MMX_H
  23. #include <stddef.h>
  24. #include <stdint.h>
  25. #include "libavcodec/dsputil.h"
  26. #include "libavutil/x86/asm.h"
  27. #include "constants.h"
  28. #define MOVQ_WONE(regd) \
  29. __asm__ volatile ( \
  30. "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
  31. "psrlw $15, %%" #regd ::)
  32. #define JUMPALIGN() __asm__ volatile (".p2align 3"::)
  33. #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
  34. #define MOVQ_BFE(regd) \
  35. __asm__ volatile ( \
  36. "pcmpeqd %%"#regd", %%"#regd" \n\t" \
  37. "paddb %%"#regd", %%"#regd" \n\t" ::)
  38. #ifndef PIC
  39. #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
  40. #else
  41. // for shared library it's better to use this way for accessing constants
  42. // pcmpeqd -> -1
  43. #define MOVQ_WTWO(regd) \
  44. __asm__ volatile ( \
  45. "pcmpeqd %%"#regd", %%"#regd" \n\t" \
  46. "psrlw $15, %%"#regd" \n\t" \
  47. "psllw $1, %%"#regd" \n\t"::)
  48. #endif
  49. // using regr as temporary and for the output result
  50. // first argument is unmodifed and second is trashed
  51. // regfe is supposed to contain 0xfefefefefefefefe
  52. #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
  53. "movq "#rega", "#regr" \n\t" \
  54. "pand "#regb", "#regr" \n\t" \
  55. "pxor "#rega", "#regb" \n\t" \
  56. "pand "#regfe", "#regb" \n\t" \
  57. "psrlq $1, "#regb" \n\t" \
  58. "paddb "#regb", "#regr" \n\t"
  59. #define PAVGB_MMX(rega, regb, regr, regfe) \
  60. "movq "#rega", "#regr" \n\t" \
  61. "por "#regb", "#regr" \n\t" \
  62. "pxor "#rega", "#regb" \n\t" \
  63. "pand "#regfe", "#regb" \n\t" \
  64. "psrlq $1, "#regb" \n\t" \
  65. "psubb "#regb", "#regr" \n\t"
  66. // mm6 is supposed to contain 0xfefefefefefefefe
  67. #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
  68. "movq "#rega", "#regr" \n\t" \
  69. "movq "#regc", "#regp" \n\t" \
  70. "pand "#regb", "#regr" \n\t" \
  71. "pand "#regd", "#regp" \n\t" \
  72. "pxor "#rega", "#regb" \n\t" \
  73. "pxor "#regc", "#regd" \n\t" \
  74. "pand %%mm6, "#regb" \n\t" \
  75. "pand %%mm6, "#regd" \n\t" \
  76. "psrlq $1, "#regb" \n\t" \
  77. "psrlq $1, "#regd" \n\t" \
  78. "paddb "#regb", "#regr" \n\t" \
  79. "paddb "#regd", "#regp" \n\t"
  80. #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
  81. "movq "#rega", "#regr" \n\t" \
  82. "movq "#regc", "#regp" \n\t" \
  83. "por "#regb", "#regr" \n\t" \
  84. "por "#regd", "#regp" \n\t" \
  85. "pxor "#rega", "#regb" \n\t" \
  86. "pxor "#regc", "#regd" \n\t" \
  87. "pand %%mm6, "#regb" \n\t" \
  88. "pand %%mm6, "#regd" \n\t" \
  89. "psrlq $1, "#regd" \n\t" \
  90. "psrlq $1, "#regb" \n\t" \
  91. "psubb "#regb", "#regr" \n\t" \
  92. "psubb "#regd", "#regp" \n\t"
  93. void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
  94. void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
  95. void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
  96. void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
  97. void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
  98. void ff_clear_block_mmx(int16_t *block);
  99. void ff_clear_block_sse(int16_t *block);
  100. void ff_clear_blocks_mmx(int16_t *blocks);
  101. void ff_clear_blocks_sse(int16_t *blocks);
  102. void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
  103. void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
  104. const uint8_t *diff, int w,
  105. int *left, int *left_top);
  106. void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
  107. int w, int h, int sides);
  108. void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
  109. int stride, int h, int ox, int oy,
  110. int dxx, int dxy, int dyx, int dyy,
  111. int shift, int r, int width, int height);
  112. void ff_vector_clipf_sse(float *dst, const float *src,
  113. float min, float max, int len);
  114. void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  115. ptrdiff_t line_size, int h);
  116. void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  117. ptrdiff_t line_size, int h);
  118. void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  119. ptrdiff_t line_size, int h);
  120. void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  121. ptrdiff_t line_size, int h);
  122. void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
  123. ptrdiff_t line_size, int h);
  124. void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
  125. ptrdiff_t line_size, int h);
  126. void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
  127. ptrdiff_t line_size, int h);
  128. void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
  129. ptrdiff_t line_size, int h);
  130. void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
  131. ptrdiff_t line_size, int h);
  132. void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
  133. ptrdiff_t line_size, int h);
  134. void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
  135. ptrdiff_t line_size, int h);
  136. void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
  137. ptrdiff_t line_size, int h);
  138. void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
  139. ptrdiff_t line_size, int h);
  140. void ff_deinterlace_line_mmx(uint8_t *dst,
  141. const uint8_t *lum_m4, const uint8_t *lum_m3,
  142. const uint8_t *lum_m2, const uint8_t *lum_m1,
  143. const uint8_t *lum,
  144. int size);
  145. void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
  146. const uint8_t *lum_m3,
  147. const uint8_t *lum_m2,
  148. const uint8_t *lum_m1,
  149. const uint8_t *lum, int size);
  150. #define PIXELS16(STATIC, PFX1, PFX2, TYPE, CPUEXT) \
  151. STATIC void PFX1 ## _pixels16 ## TYPE ## CPUEXT(uint8_t *block, \
  152. const uint8_t *pixels, \
  153. ptrdiff_t line_size, \
  154. int h) \
  155. { \
  156. PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block, pixels, \
  157. line_size, h); \
  158. PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block + 8, pixels + 8, \
  159. line_size, h); \
  160. }
  161. #endif /* AVCODEC_X86_DSPUTIL_MMX_H */