You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
3.8KB

  1. ;*****************************************************************************
  2. ;* x86-optimized AC-3 DSP utils
  3. ;* Copyright (c) 2011 Justin Ruggles
  4. ;*
  5. ;* This file is part of FFmpeg.
  6. ;*
  7. ;* FFmpeg is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* FFmpeg is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with FFmpeg; if not, write to the Free Software
  19. ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. %include "x86inc.asm"
  22. %include "x86util.asm"
  23. SECTION .text
  24. ;-----------------------------------------------------------------------------
  25. ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
  26. ;-----------------------------------------------------------------------------
  27. %macro AC3_EXPONENT_MIN 1
  28. cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
  29. shl reuse_blksq, 8
  30. jz .end
  31. LOOP_ALIGN
  32. .nextexp:
  33. mov offsetq, reuse_blksq
  34. mova m0, [expq+offsetq]
  35. sub offsetq, 256
  36. LOOP_ALIGN
  37. .nextblk:
  38. PMINUB m0, [expq+offsetq], m1
  39. sub offsetq, 256
  40. jae .nextblk
  41. mova [expq], m0
  42. add expq, mmsize
  43. sub expnq, mmsize
  44. jg .nextexp
  45. .end:
  46. REP_RET
  47. %endmacro
  48. %define PMINUB PMINUB_MMX
  49. %define LOOP_ALIGN
  50. INIT_MMX
  51. AC3_EXPONENT_MIN mmx
  52. %ifdef HAVE_MMX2
  53. %define PMINUB PMINUB_MMXEXT
  54. %define LOOP_ALIGN ALIGN 16
  55. AC3_EXPONENT_MIN mmxext
  56. %endif
  57. %ifdef HAVE_SSE
  58. INIT_XMM
  59. AC3_EXPONENT_MIN sse2
  60. %endif
  61. %undef PMINUB
  62. %undef LOOP_ALIGN
  63. ;-----------------------------------------------------------------------------
  64. ; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
  65. ;
  66. ; This function uses 2 different methods to calculate a valid result.
  67. ; 1) logical 'or' of abs of each element
  68. ; This is used for ssse3 because of the pabsw instruction.
  69. ; It is also used for mmx because of the lack of min/max instructions.
  70. ; 2) calculate min/max for the array, then or(abs(min),abs(max))
  71. ; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
  72. ;-----------------------------------------------------------------------------
  73. %macro AC3_MAX_MSB_ABS_INT16 2
  74. cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
  75. pxor m2, m2
  76. pxor m3, m3
  77. .loop:
  78. %ifidn %2, min_max
  79. mova m0, [srcq]
  80. mova m1, [srcq+mmsize]
  81. pminsw m2, m0
  82. pminsw m2, m1
  83. pmaxsw m3, m0
  84. pmaxsw m3, m1
  85. %else ; or_abs
  86. %ifidn %1, mmx
  87. mova m0, [srcq]
  88. mova m1, [srcq+mmsize]
  89. ABS2 m0, m1, m3, m4
  90. %else ; ssse3
  91. ; using memory args is faster for ssse3
  92. pabsw m0, [srcq]
  93. pabsw m1, [srcq+mmsize]
  94. %endif
  95. por m2, m0
  96. por m2, m1
  97. %endif
  98. add srcq, mmsize*2
  99. sub lend, mmsize
  100. ja .loop
  101. %ifidn %2, min_max
  102. ABS2 m2, m3, m0, m1
  103. por m2, m3
  104. %endif
  105. %ifidn mmsize, 16
  106. movhlps m0, m2
  107. por m2, m0
  108. %endif
  109. PSHUFLW m0, m2, 0xe
  110. por m2, m0
  111. PSHUFLW m0, m2, 0x1
  112. por m2, m0
  113. movd eax, m2
  114. and eax, 0xFFFF
  115. RET
  116. %endmacro
  117. INIT_MMX
  118. %define ABS2 ABS2_MMX
  119. %define PSHUFLW pshufw
  120. AC3_MAX_MSB_ABS_INT16 mmx, or_abs
  121. %define ABS2 ABS2_MMX2
  122. AC3_MAX_MSB_ABS_INT16 mmxext, min_max
  123. INIT_XMM
  124. %define PSHUFLW pshuflw
  125. AC3_MAX_MSB_ABS_INT16 sse2, min_max
  126. %define ABS2 ABS2_SSSE3
  127. AC3_MAX_MSB_ABS_INT16 ssse3, or_abs