You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.7KB

  1. /*
  2. * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "asm.S"
  21. function ff_ac3_max_msb_abs_int16_neon, export=1
  22. vmov.i16 q0, #0
  23. vmov.i16 q2, #0
  24. 1: vld1.16 {q1}, [r0,:128]!
  25. vabs.s16 q1, q1
  26. vld1.16 {q3}, [r0,:128]!
  27. vabs.s16 q3, q3
  28. vorr q0, q0, q1
  29. vorr q2, q2, q3
  30. subs r1, r1, #16
  31. bgt 1b
  32. vorr q0, q0, q2
  33. vorr d0, d0, d1
  34. vpmax.u16 d0, d0, d0
  35. vpmax.u16 d0, d0, d0
  36. vmov.u16 r0, d0[0]
  37. bx lr
  38. endfunc
  39. function ff_ac3_exponent_min_neon, export=1
  40. cmp r1, #0
  41. bxeq lr
  42. push {lr}
  43. mov r12, #256
  44. 1:
  45. vld1.8 {q0}, [r0,:128]
  46. mov lr, r1
  47. add r3, r0, #256
  48. 2: vld1.8 {q1}, [r3,:128], r12
  49. subs lr, lr, #1
  50. vmin.u8 q0, q0, q1
  51. bgt 2b
  52. subs r2, r2, #16
  53. vst1.8 {q0}, [r0,:128]!
  54. bgt 1b
  55. pop {pc}
  56. endfunc
  57. function ff_ac3_lshift_int16_neon, export=1
  58. vdup.16 q0, r2
  59. 1: vld1.16 {q1}, [r0,:128]
  60. vshl.s16 q1, q1, q0
  61. vst1.16 {q1}, [r0,:128]!
  62. subs r1, r1, #8
  63. bgt 1b
  64. bx lr
  65. endfunc
  66. function ff_ac3_rshift_int32_neon, export=1
  67. rsb r2, r2, #0
  68. vdup.32 q0, r2
  69. 1: vld1.32 {q1}, [r0,:128]
  70. vshl.s32 q1, q1, q0
  71. vst1.32 {q1}, [r0,:128]!
  72. subs r1, r1, #4
  73. bgt 1b
  74. bx lr
  75. endfunc
  76. function ff_float_to_fixed24_neon, export=1
  77. 1: vld1.32 {q0-q1}, [r1,:128]!
  78. vcvt.s32.f32 q0, q0, #24
  79. vld1.32 {q2-q3}, [r1,:128]!
  80. vcvt.s32.f32 q1, q1, #24
  81. vcvt.s32.f32 q2, q2, #24
  82. vst1.32 {q0-q1}, [r0,:128]!
  83. vcvt.s32.f32 q3, q3, #24
  84. vst1.32 {q2-q3}, [r0,:128]!
  85. subs r2, r2, #16
  86. bgt 1b
  87. bx lr
  88. endfunc
  89. function ff_ac3_extract_exponents_neon, export=1
  90. vmov.i32 q14, #24
  91. vmov.i32 q15, #8
  92. 1:
  93. vld1.32 {q0}, [r1,:128]
  94. vabs.s32 q1, q0
  95. vclz.i32 q3, q1
  96. vsub.i32 q3, q3, q15
  97. vcge.s32 q2, q3, q14
  98. vbit q3, q14, q2
  99. vbic q0, q0, q2
  100. vmovn.i32 d6, q3
  101. vmovn.i16 d6, q3
  102. vst1.32 {q0}, [r1,:128]!
  103. vst1.32 {d6[0]}, [r0,:32]!
  104. subs r2, r2, #4
  105. bgt 1b
  106. bx lr
  107. endfunc