You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
5.7KB

  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Michael Niedermayer
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86inc.asm"
  21. %include "libavutil/x86/x86util.asm"
  22. SECTION_RODATA
  23. flt2pm31: times 8 dd 4.6566129e-10
  24. flt2p31 : times 8 dd 2147483648.0
  25. flt2p15 : times 8 dd 32768.0
  26. SECTION .text
  27. %macro INT16_TO_INT32 1
  28. cglobal int16_to_int32_%1, 3, 3, 3, dst, src, len
  29. mov srcq, [srcq]
  30. mov dstq, [dstq]
  31. %ifidn %1, a
  32. test dstq, mmsize-1
  33. jne int16_to_int32_u_int %+ SUFFIX
  34. test srcq, mmsize-1
  35. jne int16_to_int32_u_int %+ SUFFIX
  36. %else
  37. int16_to_int32_u_int %+ SUFFIX
  38. %endif
  39. add dstq, lenq
  40. shr lenq, 1
  41. add srcq, lenq
  42. neg lenq
  43. .next
  44. mov%1 m2, [srcq+lenq]
  45. pxor m0, m0
  46. pxor m1, m1
  47. punpcklwd m0, m2
  48. punpckhwd m1, m2
  49. mov%1 [ dstq+2*lenq], m0
  50. mov%1 [mmsize + dstq+2*lenq], m1
  51. add lenq, mmsize
  52. jl .next
  53. %if mmsize == 8
  54. emms
  55. %endif
  56. REP_RET
  57. %endmacro
  58. %macro INT32_TO_FLOAT 1
  59. cglobal int32_to_float_%1, 3, 3, 3, dst, src, len
  60. mov srcq, [srcq]
  61. mov dstq, [dstq]
  62. %ifidn %1, a
  63. test dstq, mmsize-1
  64. jne int32_to_float_u_int %+ SUFFIX
  65. test srcq, mmsize-1
  66. jne int32_to_float_u_int %+ SUFFIX
  67. %else
  68. int32_to_float_u_int %+ SUFFIX
  69. %endif
  70. add srcq, lenq
  71. add dstq, lenq
  72. neg lenq
  73. mova m2, [flt2pm31]
  74. .next:
  75. %ifidn %1, a
  76. cvtdq2ps m0, [ srcq+lenq]
  77. cvtdq2ps m1, [mmsize + srcq+lenq]
  78. %else
  79. movu m0, [ srcq+lenq]
  80. movu m1, [mmsize + srcq+lenq]
  81. cvtdq2ps m0, m0
  82. cvtdq2ps m1, m1
  83. %endif
  84. mulps m0, m2
  85. mulps m1, m2
  86. mov%1 [ dstq+lenq], m0
  87. mov%1 [mmsize + dstq+lenq], m1
  88. add lenq, 2*mmsize
  89. jl .next
  90. REP_RET
  91. %endmacro
  92. %macro INT16_TO_FLOAT 1
  93. cglobal int16_to_float_%1, 3, 3, 4, dst, src, len
  94. mov srcq, [srcq]
  95. mov dstq, [dstq]
  96. %ifidn %1, a
  97. test dstq, mmsize-1
  98. jne int16_to_float_u_int %+ SUFFIX
  99. test srcq, mmsize-1
  100. jne int16_to_float_u_int %+ SUFFIX
  101. %else
  102. int16_to_float_u_int %+ SUFFIX
  103. %endif
  104. add dstq, lenq
  105. shr lenq, 1
  106. add srcq, lenq
  107. neg lenq
  108. mova m3, [flt2pm31]
  109. .next:
  110. mov%1 m2, [srcq+lenq]
  111. pxor m0, m0
  112. pxor m1, m1
  113. punpcklwd m0, m2
  114. punpckhwd m1, m2
  115. cvtdq2ps m0, m0
  116. cvtdq2ps m1, m1
  117. mulps m0, m3
  118. mulps m1, m3
  119. mov%1 [ dstq+2*lenq], m0
  120. mov%1 [mmsize + dstq+2*lenq], m1
  121. add lenq, mmsize
  122. jl .next
  123. REP_RET
  124. %endmacro
  125. %macro FLOAT_TO_INT32 1
  126. cglobal float_to_int32_%1, 3, 3, 5, dst, src, len
  127. mov srcq, [srcq]
  128. mov dstq, [dstq]
  129. %ifidn %1, a
  130. test dstq, mmsize-1
  131. jne float_to_int32_u_int %+ SUFFIX
  132. test srcq, mmsize-1
  133. jne float_to_int32_u_int %+ SUFFIX
  134. %else
  135. float_to_int32_u_int %+ SUFFIX
  136. %endif
  137. add srcq, lenq
  138. add dstq, lenq
  139. neg lenq
  140. mova m2, [flt2p31]
  141. .next:
  142. mov%1 m0, [ srcq+lenq]
  143. mov%1 m1, [mmsize + srcq+lenq]
  144. mulps m0, m2
  145. mulps m1, m2
  146. cvtps2dq m3, m0
  147. cvtps2dq m4, m1
  148. cmpnltps m0, m2
  149. cmpnltps m1, m2
  150. paddd m0, m3
  151. paddd m1, m4
  152. mov%1 [ dstq+lenq], m0
  153. mov%1 [mmsize + dstq+lenq], m1
  154. add lenq, 2*mmsize
  155. jl .next
  156. REP_RET
  157. %endmacro
  158. %macro FLOAT_TO_INT16 1
  159. cglobal float_to_int16_%1, 3, 3, 3, dst, src, len
  160. mov srcq, [srcq]
  161. mov dstq, [dstq]
  162. %ifidn %1, a
  163. test dstq, mmsize-1
  164. jne float_to_int16_u_int %+ SUFFIX
  165. test srcq, mmsize-1
  166. jne float_to_int16_u_int %+ SUFFIX
  167. %else
  168. float_to_int16_u_int %+ SUFFIX
  169. %endif
  170. lea srcq, [srcq + 2*lenq]
  171. add dstq, lenq
  172. neg lenq
  173. mova m2, [flt2p15]
  174. .next:
  175. mov%1 m0, [ srcq+2*lenq]
  176. mov%1 m1, [mmsize + srcq+2*lenq]
  177. mulps m0, m2
  178. mulps m1, m2
  179. cvtps2dq m0, m0
  180. cvtps2dq m1, m1
  181. packssdw m0, m1
  182. mov%1 [ dstq+lenq], m0
  183. add lenq, mmsize
  184. jl .next
  185. REP_RET
  186. %endmacro
  187. %macro INT32_TO_INT16 1
  188. cglobal int32_to_int16_%1, 3, 3, 2, dst, src, len
  189. mov srcq, [srcq]
  190. mov dstq, [dstq]
  191. %ifidn %1, a
  192. test dstq, mmsize-1
  193. jne int32_to_int16_u_int %+ SUFFIX
  194. test srcq, mmsize-1
  195. jne int32_to_int16_u_int %+ SUFFIX
  196. %else
  197. int32_to_int16_u_int %+ SUFFIX
  198. %endif
  199. lea srcq, [srcq + 2*lenq]
  200. add dstq, lenq
  201. neg lenq
  202. .next:
  203. mov%1 m0, [ srcq+2*lenq]
  204. mov%1 m1, [mmsize + srcq+2*lenq]
  205. psrad m0, 16
  206. psrad m1, 16
  207. packssdw m0, m1
  208. mov%1 [ dstq+lenq], m0
  209. add lenq, mmsize
  210. jl .next
  211. REP_RET
  212. %endmacro
  213. INIT_MMX mmx
  214. INT16_TO_INT32 u
  215. INT16_TO_INT32 a
  216. INT32_TO_INT16 u
  217. INT32_TO_INT16 a
  218. INIT_XMM sse
  219. INT16_TO_INT32 u
  220. INT16_TO_INT32 a
  221. INT32_TO_INT16 u
  222. INT32_TO_INT16 a
  223. INIT_XMM sse2
  224. INT32_TO_FLOAT u
  225. INT32_TO_FLOAT a
  226. INT16_TO_FLOAT u
  227. INT16_TO_FLOAT a
  228. FLOAT_TO_INT32 u
  229. FLOAT_TO_INT32 a
  230. FLOAT_TO_INT16 u
  231. FLOAT_TO_INT16 a