You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
3.8KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/attributes.h"
  19. #include "libavutil/cpu.h"
  20. #include "libavutil/x86/cpu.h"
  21. #include "libavcodec/avcodec.h"
  22. #include "libavcodec/mpegvideoencdsp.h"
  23. int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
  24. int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
  25. #if HAVE_INLINE_ASM
  26. #define PHADDD(a, t) \
  27. "movq " #a ", " #t " \n\t" \
  28. "psrlq $32, " #a " \n\t" \
  29. "paddd " #t ", " #a " \n\t"
  30. /*
  31. * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
  32. * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
  33. * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
  34. */
  35. #define PMULHRW(x, y, s, o) \
  36. "pmulhw " #s ", " #x " \n\t" \
  37. "pmulhw " #s ", " #y " \n\t" \
  38. "paddw " #o ", " #x " \n\t" \
  39. "paddw " #o ", " #y " \n\t" \
  40. "psraw $1, " #x " \n\t" \
  41. "psraw $1, " #y " \n\t"
  42. #define DEF(x) x ## _mmx
  43. #define SET_RND MOVQ_WONE
  44. #define SCALE_OFFSET 1
  45. #include "mpegvideoenc_qns_template.c"
  46. #undef DEF
  47. #undef SET_RND
  48. #undef SCALE_OFFSET
  49. #undef PMULHRW
  50. #define DEF(x) x ## _3dnow
  51. #define SET_RND(x)
  52. #define SCALE_OFFSET 0
  53. #define PMULHRW(x, y, s, o) \
  54. "pmulhrw " #s ", " #x " \n\t" \
  55. "pmulhrw " #s ", " #y " \n\t"
  56. #include "mpegvideoenc_qns_template.c"
  57. #undef DEF
  58. #undef SET_RND
  59. #undef SCALE_OFFSET
  60. #undef PMULHRW
  61. #if HAVE_SSSE3_INLINE
  62. #undef PHADDD
  63. #define DEF(x) x ## _ssse3
  64. #define SET_RND(x)
  65. #define SCALE_OFFSET -1
  66. #define PHADDD(a, t) \
  67. "pshufw $0x0E, " #a ", " #t " \n\t" \
  68. /* faster than phaddd on core2 */ \
  69. "paddd " #t ", " #a " \n\t"
  70. #define PMULHRW(x, y, s, o) \
  71. "pmulhrsw " #s ", " #x " \n\t" \
  72. "pmulhrsw " #s ", " #y " \n\t"
  73. #include "mpegvideoenc_qns_template.c"
  74. #undef DEF
  75. #undef SET_RND
  76. #undef SCALE_OFFSET
  77. #undef PMULHRW
  78. #undef PHADDD
  79. #endif /* HAVE_SSSE3_INLINE */
  80. #endif /* HAVE_INLINE_ASM */
  81. av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
  82. AVCodecContext *avctx)
  83. {
  84. int cpu_flags = av_get_cpu_flags();
  85. if (EXTERNAL_MMX(cpu_flags)) {
  86. c->pix_sum = ff_pix_sum16_mmx;
  87. c->pix_norm1 = ff_pix_norm1_mmx;
  88. }
  89. #if HAVE_INLINE_ASM
  90. if (INLINE_MMX(cpu_flags)) {
  91. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  92. c->try_8x8basis = try_8x8basis_mmx;
  93. }
  94. c->add_8x8basis = add_8x8basis_mmx;
  95. }
  96. if (INLINE_AMD3DNOW(cpu_flags)) {
  97. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  98. c->try_8x8basis = try_8x8basis_3dnow;
  99. }
  100. c->add_8x8basis = add_8x8basis_3dnow;
  101. }
  102. #if HAVE_SSSE3_INLINE
  103. if (INLINE_SSSE3(cpu_flags)) {
  104. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  105. c->try_8x8basis = try_8x8basis_ssse3;
  106. }
  107. c->add_8x8basis = add_8x8basis_ssse3;
  108. }
  109. #endif /* HAVE_SSSE3_INLINE */
  110. #endif /* HAVE_INLINE_ASM */
  111. }