You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
5.3KB

  1. ;*****************************************************************************
  2. ;* x86-optimized AC-3 downmixing
  3. ;* Copyright (c) 2012 Justin Ruggles
  4. ;*
  5. ;* This file is part of Libav.
  6. ;*
  7. ;* Libav is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* Libav is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with Libav; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. ;******************************************************************************
  22. ;* This is based on the channel mixing asm in libavresample, but it is
  23. ;* simplified for only float coefficients and only 3 to 6 channels.
  24. ;******************************************************************************
  25. %include "libavutil/x86/x86util.asm"
  26. SECTION .text
  27. ;-----------------------------------------------------------------------------
  28. ; functions to downmix from 3 to 6 channels to mono or stereo
  29. ; void ff_ac3_downmix_*(float **samples, float **matrix, int len);
  30. ;-----------------------------------------------------------------------------
  31. %macro AC3_DOWNMIX 2 ; %1 = in channels, %2 = out channels
  32. ; define some names to make the code clearer
  33. %assign in_channels %1
  34. %assign out_channels %2
  35. %assign stereo out_channels - 1
  36. ; determine how many matrix elements must go on the stack vs. mmregs
  37. %assign matrix_elements in_channels * out_channels
  38. %if stereo
  39. %assign needed_mmregs 4
  40. %else
  41. %assign needed_mmregs 3
  42. %endif
  43. %assign matrix_elements_mm num_mmregs - needed_mmregs
  44. %if matrix_elements < matrix_elements_mm
  45. %assign matrix_elements_mm matrix_elements
  46. %endif
  47. %assign total_mmregs needed_mmregs+matrix_elements_mm
  48. %if matrix_elements_mm < matrix_elements
  49. %assign matrix_elements_stack matrix_elements - matrix_elements_mm
  50. %else
  51. %assign matrix_elements_stack 0
  52. %endif
  53. cglobal ac3_downmix_%1_to_%2, 3,in_channels+1,total_mmregs,0-matrix_elements_stack*mmsize, src0, src1, len, src2, src3, src4, src5
  54. ; load matrix pointers
  55. %define matrix0q r1q
  56. %define matrix1q r3q
  57. %if stereo
  58. mov matrix1q, [matrix0q+gprsize]
  59. %endif
  60. mov matrix0q, [matrix0q]
  61. ; define matrix coeff names
  62. %assign %%i 0
  63. %assign %%j needed_mmregs
  64. %rep in_channels
  65. %if %%i >= matrix_elements_mm
  66. CAT_XDEFINE mx_stack_0_, %%i, 1
  67. CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize]
  68. %else
  69. CAT_XDEFINE mx_stack_0_, %%i, 0
  70. CAT_XDEFINE mx_0_, %%i, m %+ %%j
  71. %assign %%j %%j+1
  72. %endif
  73. %assign %%i %%i+1
  74. %endrep
  75. %if stereo
  76. %assign %%i 0
  77. %rep in_channels
  78. %if in_channels + %%i >= matrix_elements_mm
  79. CAT_XDEFINE mx_stack_1_, %%i, 1
  80. CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize]
  81. %else
  82. CAT_XDEFINE mx_stack_1_, %%i, 0
  83. CAT_XDEFINE mx_1_, %%i, m %+ %%j
  84. %assign %%j %%j+1
  85. %endif
  86. %assign %%i %%i+1
  87. %endrep
  88. %endif
  89. ; load/splat matrix coeffs
  90. %assign %%i 0
  91. %rep in_channels
  92. %if mx_stack_0_ %+ %%i
  93. VBROADCASTSS m0, [matrix0q+4*%%i]
  94. mova mx_0_ %+ %%i, m0
  95. %else
  96. VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i]
  97. %endif
  98. %if stereo
  99. %if mx_stack_1_ %+ %%i
  100. VBROADCASTSS m0, [matrix1q+4*%%i]
  101. mova mx_1_ %+ %%i, m0
  102. %else
  103. VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i]
  104. %endif
  105. %endif
  106. %assign %%i %%i+1
  107. %endrep
  108. lea lenq, [4*r2d]
  109. ; load channel pointers to registers
  110. %assign %%i 1
  111. %rep (in_channels - 1)
  112. mov src %+ %%i %+ q, [src0q+%%i*gprsize]
  113. add src %+ %%i %+ q, lenq
  114. %assign %%i %%i+1
  115. %endrep
  116. mov src0q, [src0q]
  117. add src0q, lenq
  118. neg lenq
  119. .loop:
  120. %if stereo || mx_stack_0_0
  121. mova m0, [src0q+lenq]
  122. %endif
  123. %if stereo
  124. mulps m1, m0, mx_1_0
  125. %endif
  126. %if stereo || mx_stack_0_0
  127. mulps m0, m0, mx_0_0
  128. %else
  129. mulps m0, mx_0_0, [src0q+lenq]
  130. %endif
  131. %assign %%i 1
  132. %rep (in_channels - 1)
  133. %define src_ptr src %+ %%i %+ q
  134. ; avoid extra load for mono if matrix is in a mm register
  135. %if stereo || mx_stack_0_ %+ %%i
  136. mova m2, [src_ptr+lenq]
  137. %endif
  138. %if stereo
  139. FMULADD_PS m1, m2, mx_1_ %+ %%i, m1, m3
  140. %endif
  141. %if stereo || mx_stack_0_ %+ %%i
  142. FMULADD_PS m0, m2, mx_0_ %+ %%i, m0, m2
  143. %else
  144. FMULADD_PS m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
  145. %endif
  146. %assign %%i %%i+1
  147. %endrep
  148. mova [src0q+lenq], m0
  149. %if stereo
  150. mova [src1q+lenq], m1
  151. %endif
  152. add lenq, mmsize
  153. jl .loop
  154. RET
  155. %endmacro
  156. %macro AC3_DOWNMIX_FUNCS 0
  157. %assign %%i 3
  158. %rep 4
  159. INIT_XMM sse
  160. AC3_DOWNMIX %%i, 1
  161. AC3_DOWNMIX %%i, 2
  162. INIT_YMM avx
  163. AC3_DOWNMIX %%i, 1
  164. AC3_DOWNMIX %%i, 2
  165. %if HAVE_FMA3_EXTERNAL
  166. INIT_YMM fma3
  167. AC3_DOWNMIX %%i, 1
  168. AC3_DOWNMIX %%i, 2
  169. %endif
  170. %assign %%i %%i+1
  171. %endrep
  172. %endmacro
  173. AC3_DOWNMIX_FUNCS