You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.6KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/attributes.h"
  19. #include "libavutil/cpu.h"
  20. #include "libavutil/x86/cpu.h"
  21. #include "libavcodec/avcodec.h"
  22. #include "libavcodec/mpegvideoencdsp.h"
  23. #if HAVE_INLINE_ASM
  24. #define PHADDD(a, t) \
  25. "movq " #a ", " #t " \n\t" \
  26. "psrlq $32, " #a " \n\t" \
  27. "paddd " #t ", " #a " \n\t"
  28. /*
  29. * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
  30. * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
  31. * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
  32. */
  33. #define PMULHRW(x, y, s, o) \
  34. "pmulhw " #s ", " #x " \n\t" \
  35. "pmulhw " #s ", " #y " \n\t" \
  36. "paddw " #o ", " #x " \n\t" \
  37. "paddw " #o ", " #y " \n\t" \
  38. "psraw $1, " #x " \n\t" \
  39. "psraw $1, " #y " \n\t"
  40. #define DEF(x) x ## _mmx
  41. #define SET_RND MOVQ_WONE
  42. #define SCALE_OFFSET 1
  43. #include "mpegvideoenc_qns_template.c"
  44. #undef DEF
  45. #undef SET_RND
  46. #undef SCALE_OFFSET
  47. #undef PMULHRW
  48. #define DEF(x) x ## _3dnow
  49. #define SET_RND(x)
  50. #define SCALE_OFFSET 0
  51. #define PMULHRW(x, y, s, o) \
  52. "pmulhrw " #s ", " #x " \n\t" \
  53. "pmulhrw " #s ", " #y " \n\t"
  54. #include "mpegvideoenc_qns_template.c"
  55. #undef DEF
  56. #undef SET_RND
  57. #undef SCALE_OFFSET
  58. #undef PMULHRW
  59. #if HAVE_SSSE3_INLINE
  60. #undef PHADDD
  61. #define DEF(x) x ## _ssse3
  62. #define SET_RND(x)
  63. #define SCALE_OFFSET -1
  64. #define PHADDD(a, t) \
  65. "pshufw $0x0E, " #a ", " #t " \n\t" \
  66. /* faster than phaddd on core2 */ \
  67. "paddd " #t ", " #a " \n\t"
  68. #define PMULHRW(x, y, s, o) \
  69. "pmulhrsw " #s ", " #x " \n\t" \
  70. "pmulhrsw " #s ", " #y " \n\t"
  71. #include "mpegvideoenc_qns_template.c"
  72. #undef DEF
  73. #undef SET_RND
  74. #undef SCALE_OFFSET
  75. #undef PMULHRW
  76. #undef PHADDD
  77. #endif /* HAVE_SSSE3_INLINE */
  78. #endif /* HAVE_INLINE_ASM */
  79. av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
  80. AVCodecContext *avctx)
  81. {
  82. #if HAVE_INLINE_ASM
  83. int cpu_flags = av_get_cpu_flags();
  84. if (INLINE_MMX(cpu_flags)) {
  85. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  86. c->try_8x8basis = try_8x8basis_mmx;
  87. }
  88. c->add_8x8basis = add_8x8basis_mmx;
  89. }
  90. if (INLINE_AMD3DNOW(cpu_flags)) {
  91. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  92. c->try_8x8basis = try_8x8basis_3dnow;
  93. }
  94. c->add_8x8basis = add_8x8basis_3dnow;
  95. }
  96. #if HAVE_SSSE3_INLINE
  97. if (INLINE_SSSE3(cpu_flags)) {
  98. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  99. c->try_8x8basis = try_8x8basis_ssse3;
  100. }
  101. c->add_8x8basis = add_8x8basis_ssse3;
  102. }
  103. #endif /* HAVE_SSSE3_INLINE */
  104. #endif /* HAVE_INLINE_ASM */
  105. }