You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
3.0KB

  1. /*
  2. * ARM NEON optimised DSP functions
  3. * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. function ff_vorbis_inverse_coupling_neon, export=1
  23. vmov.i32 q10, #1<<31
  24. subs r2, r2, #4
  25. mov r3, r0
  26. mov r12, r1
  27. beq 3f
  28. vld1.32 {d24-d25},[r1,:128]!
  29. vld1.32 {d22-d23},[r0,:128]!
  30. vcle.s32 q8, q12, #0
  31. vand q9, q11, q10
  32. veor q12, q12, q9
  33. vand q2, q12, q8
  34. vbic q3, q12, q8
  35. vadd.f32 q12, q11, q2
  36. vsub.f32 q11, q11, q3
  37. 1: vld1.32 {d2-d3}, [r1,:128]!
  38. vld1.32 {d0-d1}, [r0,:128]!
  39. vcle.s32 q8, q1, #0
  40. vand q9, q0, q10
  41. veor q1, q1, q9
  42. vst1.32 {d24-d25},[r3, :128]!
  43. vst1.32 {d22-d23},[r12,:128]!
  44. vand q2, q1, q8
  45. vbic q3, q1, q8
  46. vadd.f32 q1, q0, q2
  47. vsub.f32 q0, q0, q3
  48. subs r2, r2, #8
  49. ble 2f
  50. vld1.32 {d24-d25},[r1,:128]!
  51. vld1.32 {d22-d23},[r0,:128]!
  52. vcle.s32 q8, q12, #0
  53. vand q9, q11, q10
  54. veor q12, q12, q9
  55. vst1.32 {d2-d3}, [r3, :128]!
  56. vst1.32 {d0-d1}, [r12,:128]!
  57. vand q2, q12, q8
  58. vbic q3, q12, q8
  59. vadd.f32 q12, q11, q2
  60. vsub.f32 q11, q11, q3
  61. b 1b
  62. 2: vst1.32 {d2-d3}, [r3, :128]!
  63. vst1.32 {d0-d1}, [r12,:128]!
  64. it lt
  65. bxlt lr
  66. 3: vld1.32 {d2-d3}, [r1,:128]
  67. vld1.32 {d0-d1}, [r0,:128]
  68. vcle.s32 q8, q1, #0
  69. vand q9, q0, q10
  70. veor q1, q1, q9
  71. vand q2, q1, q8
  72. vbic q3, q1, q8
  73. vadd.f32 q1, q0, q2
  74. vsub.f32 q0, q0, q3
  75. vst1.32 {d2-d3}, [r0,:128]!
  76. vst1.32 {d0-d1}, [r1,:128]!
  77. bx lr
  78. endfunc