You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.6KB

  1. /*
  2. * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/cpu.h"
  22. #include "libavcodec/vorbisdsp.h"
  23. #include "dsputil_mmx.h" // for ff_pdw_80000000
  24. #if HAVE_INLINE_ASM
  25. #if ARCH_X86_32
  26. static void vorbis_inverse_coupling_3dnow(float *mag, float *ang,
  27. intptr_t blocksize)
  28. {
  29. int i;
  30. __asm__ volatile ("pxor %%mm7, %%mm7":);
  31. for (i = 0; i < blocksize; i += 2) {
  32. __asm__ volatile (
  33. "movq %0, %%mm0 \n\t"
  34. "movq %1, %%mm1 \n\t"
  35. "movq %%mm0, %%mm2 \n\t"
  36. "movq %%mm1, %%mm3 \n\t"
  37. "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
  38. "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
  39. "pslld $31, %%mm2 \n\t" // keep only the sign bit
  40. "pxor %%mm2, %%mm1 \n\t"
  41. "movq %%mm3, %%mm4 \n\t"
  42. "pand %%mm1, %%mm3 \n\t"
  43. "pandn %%mm1, %%mm4 \n\t"
  44. "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
  45. "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
  46. "movq %%mm3, %1 \n\t"
  47. "movq %%mm0, %0 \n\t"
  48. : "+m"(mag[i]), "+m"(ang[i])
  49. :: "memory"
  50. );
  51. }
  52. __asm__ volatile ("femms");
  53. }
  54. #endif
  55. static void vorbis_inverse_coupling_sse(float *mag, float *ang,
  56. intptr_t blocksize)
  57. {
  58. int i;
  59. __asm__ volatile (
  60. "movaps %0, %%xmm5 \n\t"
  61. :: "m"(ff_pdw_80000000[0])
  62. );
  63. for (i = 0; i < blocksize; i += 4) {
  64. __asm__ volatile (
  65. "movaps %0, %%xmm0 \n\t"
  66. "movaps %1, %%xmm1 \n\t"
  67. "xorps %%xmm2, %%xmm2 \n\t"
  68. "xorps %%xmm3, %%xmm3 \n\t"
  69. "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
  70. "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
  71. "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
  72. "xorps %%xmm2, %%xmm1 \n\t"
  73. "movaps %%xmm3, %%xmm4 \n\t"
  74. "andps %%xmm1, %%xmm3 \n\t"
  75. "andnps %%xmm1, %%xmm4 \n\t"
  76. "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
  77. "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
  78. "movaps %%xmm3, %1 \n\t"
  79. "movaps %%xmm0, %0 \n\t"
  80. : "+m"(mag[i]), "+m"(ang[i])
  81. :: "memory"
  82. );
  83. }
  84. }
  85. #endif
  86. void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
  87. {
  88. #if HAVE_INLINE_ASM
  89. int mm_flags = av_get_cpu_flags();
  90. #if ARCH_X86_32
  91. if (mm_flags & AV_CPU_FLAG_3DNOW)
  92. dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
  93. #endif /* ARCH_X86_32 */
  94. if (mm_flags & AV_CPU_FLAG_SSE)
  95. dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
  96. #endif /* HAVE_INLINE_ASM */
  97. }