You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
6.7KB

  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Michael Niedermayer
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86inc.asm"
  21. %include "libavutil/x86/x86util.asm"
  22. SECTION_RODATA
  23. align 32
  24. flt2pm31: times 8 dd 4.6566129e-10
  25. flt2p31 : times 8 dd 2147483648.0
  26. flt2p15 : times 8 dd 32768.0
  27. SECTION .text
  28. ;to, from, a/u, log2_outsize, log_intsize, const
  29. %macro PACK_2CH 5-7
  30. cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
  31. mov src2q , [srcq+gprsize]
  32. mov srcq , [srcq]
  33. mov dstq , [dstq]
  34. %ifidn %3, a
  35. test dstq, mmsize-1
  36. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  37. test srcq, mmsize-1
  38. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  39. test src2q, mmsize-1
  40. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  41. %else
  42. pack_2ch_%2_to_%1_u_int %+ SUFFIX
  43. %endif
  44. lea srcq , [srcq + (1<<%5)*lenq]
  45. lea src2q, [src2q + (1<<%5)*lenq]
  46. lea dstq , [dstq + (2<<%4)*lenq]
  47. neg lenq
  48. %7
  49. .next:
  50. mov%3 m0, [ srcq +(1<<%5)*lenq]
  51. mova m1, m0
  52. mov%3 m2, [ src2q+(1<<%5)*lenq]
  53. %if %5 == 1
  54. punpcklwd m0, m2
  55. punpckhwd m1, m2
  56. %else
  57. punpckldq m0, m2
  58. punpckhdq m1, m2
  59. %endif
  60. %if %4 < %5
  61. mov%3 m2, [mmsize + srcq +(1<<%5)*lenq]
  62. mova m3, m2
  63. mov%3 m4, [mmsize + src2q+(1<<%5)*lenq]
  64. punpckldq m2, m4
  65. punpckhdq m3, m4
  66. %endif
  67. %6
  68. mov%3 [ dstq+(2<<%4)*lenq], m0
  69. mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
  70. %if %4 > %5
  71. mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
  72. mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
  73. add lenq, 4*mmsize/(2<<%4)
  74. %else
  75. add lenq, 2*mmsize/(2<<%4)
  76. %endif
  77. jl .next
  78. REP_RET
  79. %endmacro
  80. %macro CONV 5-7
  81. cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
  82. mov srcq , [srcq]
  83. mov dstq , [dstq]
  84. %ifidn %3, a
  85. test dstq, mmsize-1
  86. jne %2_to_%1_u_int %+ SUFFIX
  87. test srcq, mmsize-1
  88. jne %2_to_%1_u_int %+ SUFFIX
  89. %else
  90. %2_to_%1_u_int %+ SUFFIX
  91. %endif
  92. lea srcq , [srcq + (1<<%5)*lenq]
  93. lea dstq , [dstq + (1<<%4)*lenq]
  94. neg lenq
  95. %7
  96. .next:
  97. mov%3 m0, [ srcq +(1<<%5)*lenq]
  98. mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
  99. %if %4 < %5
  100. mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
  101. mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
  102. %endif
  103. %6
  104. mov%3 [ dstq+(1<<%4)*lenq], m0
  105. mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
  106. %if %4 > %5
  107. mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
  108. mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
  109. add lenq, 4*mmsize/(1<<%4)
  110. %else
  111. add lenq, 2*mmsize/(1<<%4)
  112. %endif
  113. jl .next
  114. REP_RET
  115. %endmacro
  116. %macro INT16_TO_INT32_N 0
  117. pxor m2, m2
  118. pxor m3, m3
  119. punpcklwd m2, m1
  120. punpckhwd m3, m1
  121. SWAP 4,0
  122. pxor m0, m0
  123. pxor m1, m1
  124. punpcklwd m0, m4
  125. punpckhwd m1, m4
  126. %endmacro
  127. %macro INT32_TO_INT16_N 0
  128. psrad m0, 16
  129. psrad m1, 16
  130. psrad m2, 16
  131. psrad m3, 16
  132. packssdw m0, m1
  133. packssdw m2, m3
  134. SWAP 1,2
  135. %endmacro
  136. %macro INT32_TO_FLOAT_INIT 0
  137. mova m3, [flt2pm31]
  138. %endmacro
  139. %macro INT32_TO_FLOAT_N 0
  140. cvtdq2ps m0, m0
  141. cvtdq2ps m1, m1
  142. mulps m0, m0, m3
  143. mulps m1, m1, m3
  144. %endmacro
  145. %macro FLOAT_TO_INT32_INIT 0
  146. mova m3, [flt2p31]
  147. %endmacro
  148. %macro FLOAT_TO_INT32_N 0
  149. mulps m0, m3
  150. mulps m1, m3
  151. cvtps2dq m2, m0
  152. cvtps2dq m4, m1
  153. cmpnltps m0, m3
  154. cmpnltps m1, m3
  155. paddd m0, m2
  156. paddd m1, m4
  157. %endmacro
  158. %macro INT16_TO_FLOAT_INIT 0
  159. mova m5, [flt2pm31]
  160. %endmacro
  161. %macro INT16_TO_FLOAT_N 0
  162. INT16_TO_INT32_N
  163. cvtdq2ps m0, m0
  164. cvtdq2ps m1, m1
  165. cvtdq2ps m2, m2
  166. cvtdq2ps m3, m3
  167. mulps m0, m0, m5
  168. mulps m1, m1, m5
  169. mulps m2, m2, m5
  170. mulps m3, m3, m5
  171. %endmacro
  172. %macro FLOAT_TO_INT16_INIT 0
  173. mova m5, [flt2p15]
  174. %endmacro
  175. %macro FLOAT_TO_INT16_N 0
  176. mulps m0, m5
  177. mulps m1, m5
  178. mulps m2, m5
  179. mulps m3, m5
  180. cvtps2dq m0, m0
  181. cvtps2dq m1, m1
  182. packssdw m0, m1
  183. cvtps2dq m1, m2
  184. cvtps2dq m3, m3
  185. packssdw m1, m3
  186. %endmacro
  187. INIT_MMX mmx
  188. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N
  189. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N
  190. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N
  191. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N
  192. INIT_XMM sse
  193. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N
  194. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N
  195. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N
  196. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N
  197. PACK_2CH int16, int16, u, 1, 1
  198. PACK_2CH int16, int16, a, 1, 1
  199. PACK_2CH int32, int32, u, 2, 2
  200. PACK_2CH int32, int32, a, 2, 2
  201. PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N
  202. PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N
  203. PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N
  204. PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N
  205. INIT_XMM sse2
  206. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  207. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  208. CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  209. CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  210. CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  211. CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  212. CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  213. CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  214. PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  215. PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  216. PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  217. PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  218. PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  219. PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  220. PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  221. PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  222. %if HAVE_AVX
  223. INIT_YMM avx
  224. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  225. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  226. %endif