You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
5.0KB

  1. /*
  2. * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/arm/asm.S"
  21. function ff_ac3_max_msb_abs_int16_neon, export=1
  22. vmov.i16 q0, #0
  23. vmov.i16 q2, #0
  24. 1: vld1.16 {q1}, [r0,:128]!
  25. vabs.s16 q1, q1
  26. vld1.16 {q3}, [r0,:128]!
  27. vabs.s16 q3, q3
  28. vorr q0, q0, q1
  29. vorr q2, q2, q3
  30. subs r1, r1, #16
  31. bgt 1b
  32. vorr q0, q0, q2
  33. vorr d0, d0, d1
  34. vpmax.u16 d0, d0, d0
  35. vpmax.u16 d0, d0, d0
  36. vmov.u16 r0, d0[0]
  37. bx lr
  38. endfunc
  39. function ff_ac3_exponent_min_neon, export=1
  40. cmp r1, #0
  41. it eq
  42. bxeq lr
  43. push {lr}
  44. mov r12, #256
  45. 1:
  46. vld1.8 {q0}, [r0,:128]
  47. mov lr, r1
  48. add r3, r0, #256
  49. 2: vld1.8 {q1}, [r3,:128], r12
  50. subs lr, lr, #1
  51. vmin.u8 q0, q0, q1
  52. bgt 2b
  53. subs r2, r2, #16
  54. vst1.8 {q0}, [r0,:128]!
  55. bgt 1b
  56. pop {pc}
  57. endfunc
  58. function ff_ac3_lshift_int16_neon, export=1
  59. vdup.16 q0, r2
  60. 1: vld1.16 {q1}, [r0,:128]
  61. vshl.s16 q1, q1, q0
  62. vst1.16 {q1}, [r0,:128]!
  63. subs r1, r1, #8
  64. bgt 1b
  65. bx lr
  66. endfunc
  67. function ff_ac3_rshift_int32_neon, export=1
  68. rsb r2, r2, #0
  69. vdup.32 q0, r2
  70. 1: vld1.32 {q1}, [r0,:128]
  71. vshl.s32 q1, q1, q0
  72. vst1.32 {q1}, [r0,:128]!
  73. subs r1, r1, #4
  74. bgt 1b
  75. bx lr
  76. endfunc
  77. function ff_float_to_fixed24_neon, export=1
  78. 1: vld1.32 {q0-q1}, [r1,:128]!
  79. vcvt.s32.f32 q0, q0, #24
  80. vld1.32 {q2-q3}, [r1,:128]!
  81. vcvt.s32.f32 q1, q1, #24
  82. vcvt.s32.f32 q2, q2, #24
  83. vst1.32 {q0-q1}, [r0,:128]!
  84. vcvt.s32.f32 q3, q3, #24
  85. vst1.32 {q2-q3}, [r0,:128]!
  86. subs r2, r2, #16
  87. bgt 1b
  88. bx lr
  89. endfunc
  90. function ff_ac3_extract_exponents_neon, export=1
  91. vmov.i32 q15, #8
  92. 1:
  93. vld1.32 {q0}, [r1,:128]!
  94. vabs.s32 q1, q0
  95. vclz.i32 q3, q1
  96. vsub.i32 q3, q3, q15
  97. vmovn.i32 d6, q3
  98. vmovn.i16 d6, q3
  99. vst1.32 {d6[0]}, [r0,:32]!
  100. subs r2, r2, #4
  101. bgt 1b
  102. bx lr
  103. endfunc
  104. function ff_ac3_sum_square_butterfly_int32_neon, export=1
  105. vmov.i64 q0, #0
  106. vmov.i64 q1, #0
  107. vmov.i64 q2, #0
  108. vmov.i64 q3, #0
  109. 1:
  110. vld1.32 {d16}, [r1]!
  111. vld1.32 {d17}, [r2]!
  112. vadd.s32 d18, d16, d17
  113. vsub.s32 d19, d16, d17
  114. vmlal.s32 q0, d16, d16
  115. vmlal.s32 q1, d17, d17
  116. vmlal.s32 q2, d18, d18
  117. vmlal.s32 q3, d19, d19
  118. subs r3, r3, #2
  119. bgt 1b
  120. vadd.s64 d0, d0, d1
  121. vadd.s64 d1, d2, d3
  122. vadd.s64 d2, d4, d5
  123. vadd.s64 d3, d6, d7
  124. vst1.64 {q0-q1}, [r0]
  125. bx lr
  126. endfunc
  127. function ff_ac3_sum_square_butterfly_float_neon, export=1
  128. vmov.f32 q0, #0.0
  129. vmov.f32 q1, #0.0
  130. 1:
  131. vld1.32 {d16}, [r1]!
  132. vld1.32 {d17}, [r2]!
  133. vadd.f32 d18, d16, d17
  134. vsub.f32 d19, d16, d17
  135. vmla.f32 d0, d16, d16
  136. vmla.f32 d1, d17, d17
  137. vmla.f32 d2, d18, d18
  138. vmla.f32 d3, d19, d19
  139. subs r3, r3, #2
  140. bgt 1b
  141. vpadd.f32 d0, d0, d1
  142. vpadd.f32 d1, d2, d3
  143. vst1.32 {q0}, [r0]
  144. bx lr
  145. endfunc