You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

105 lines
2.9KB

  1. ;******************************************************************************
  2. ;* x86 optimized Format Conversion Utils
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;*
  5. ;* This file is part of Libav.
  6. ;*
  7. ;* Libav is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* Libav is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with Libav; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. %include "x86inc.asm"
  22. %include "x86util.asm"
  23. SECTION_TEXT
  24. ;-----------------------------------------------------------------------------
  25. ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
  26. ; int channels);
  27. ;-----------------------------------------------------------------------------
  28. %macro CONV_FLTP_TO_FLT_6CH 0
  29. cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
  30. %if ARCH_X86_64
  31. mov lend, r2d
  32. %else
  33. %define lend dword r2m
  34. %endif
  35. mov src1q, [srcq+1*gprsize]
  36. mov src2q, [srcq+2*gprsize]
  37. mov src3q, [srcq+3*gprsize]
  38. mov src4q, [srcq+4*gprsize]
  39. mov src5q, [srcq+5*gprsize]
  40. mov srcq, [srcq]
  41. sub src1q, srcq
  42. sub src2q, srcq
  43. sub src3q, srcq
  44. sub src4q, srcq
  45. sub src5q, srcq
  46. .loop:
  47. mova m0, [srcq ]
  48. mova m1, [srcq+src1q]
  49. mova m2, [srcq+src2q]
  50. mova m3, [srcq+src3q]
  51. mova m4, [srcq+src4q]
  52. mova m5, [srcq+src5q]
  53. %if cpuflag(sse)
  54. SBUTTERFLYPS 0, 1, 6
  55. SBUTTERFLYPS 2, 3, 6
  56. SBUTTERFLYPS 4, 5, 6
  57. movaps m6, m4
  58. shufps m4, m0, q3210
  59. movlhps m0, m2
  60. movhlps m6, m2
  61. movaps [dstq ], m0
  62. movaps [dstq+16], m4
  63. movaps [dstq+32], m6
  64. movaps m6, m5
  65. shufps m5, m1, q3210
  66. movlhps m1, m3
  67. movhlps m6, m3
  68. movaps [dstq+48], m1
  69. movaps [dstq+64], m5
  70. movaps [dstq+80], m6
  71. %else ; mmx
  72. SBUTTERFLY dq, 0, 1, 6
  73. SBUTTERFLY dq, 2, 3, 6
  74. SBUTTERFLY dq, 4, 5, 6
  75. movq [dstq ], m0
  76. movq [dstq+ 8], m2
  77. movq [dstq+16], m4
  78. movq [dstq+24], m1
  79. movq [dstq+32], m3
  80. movq [dstq+40], m5
  81. %endif
  82. add srcq, mmsize
  83. add dstq, mmsize*6
  84. sub lend, mmsize/4
  85. jg .loop
  86. %if mmsize == 8
  87. emms
  88. RET
  89. %else
  90. REP_RET
  91. %endif
  92. %endmacro
  93. INIT_MMX mmx
  94. CONV_FLTP_TO_FLT_6CH
  95. INIT_XMM sse
  96. CONV_FLTP_TO_FLT_6CH