You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.2KB

  1. /*
  2. * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/arm/asm.S"
  21. function ff_decode_hf_neon, export=1
  22. push {r4-r5,lr}
  23. add r2, r2, r3
  24. ldr r3, [sp, #12]
  25. ldrd r4, r5, [sp, #16]
  26. add r3, r3, r4, lsl #3
  27. add r1, r1, r4, lsl #2
  28. add r0, r0, r4, lsl #5
  29. 1: ldr_post lr, r1, #4
  30. add r4, r4, #1
  31. add lr, r2, lr, lsl #5
  32. cmp r4, r5
  33. vld1.32 {d7}, [r3]!
  34. vld1.8 {d0}, [lr,:64]
  35. vcvt.f32.s32 d7, d7, #4
  36. vmovl.s8 q1, d0
  37. vmovl.s16 q0, d2
  38. vmovl.s16 q1, d3
  39. vcvt.f32.s32 q0, q0
  40. vcvt.f32.s32 q1, q1
  41. vmul.f32 q0, q0, d7[0]
  42. vmul.f32 q1, q1, d7[0]
  43. vst1.32 {q0-q1}, [r0,:128]!
  44. bne 1b
  45. pop {r4-r5,pc}
  46. endfunc
  47. function ff_dca_lfe_fir0_neon, export=1
  48. push {r4-r6,lr}
  49. mov r3, #32 @ decifactor
  50. mov r6, #256/32
  51. b dca_lfe_fir
  52. endfunc
  53. function ff_dca_lfe_fir1_neon, export=1
  54. push {r4-r6,lr}
  55. mov r3, #64 @ decifactor
  56. mov r6, #256/64
  57. dca_lfe_fir:
  58. add r4, r0, r3, lsl #2 @ out2
  59. add r5, r2, #256*4-16 @ cf1
  60. sub r1, r1, #12
  61. mov lr, #-16
  62. 1:
  63. vmov.f32 q2, #0.0 @ v0
  64. vmov.f32 q3, #0.0 @ v1
  65. mov r12, r6
  66. 2:
  67. vld1.32 {q8}, [r2,:128]! @ cf0
  68. vld1.32 {q9}, [r5,:128], lr @ cf1
  69. vld1.32 {q1}, [r1], lr @ in
  70. subs r12, r12, #4
  71. vrev64.32 q10, q8
  72. vmla.f32 q3, q1, q9
  73. vmla.f32 d4, d2, d21
  74. vmla.f32 d5, d3, d20
  75. bne 2b
  76. add r1, r1, r6, lsl #2
  77. subs r3, r3, #1
  78. vadd.f32 d4, d4, d5
  79. vadd.f32 d6, d6, d7
  80. vpadd.f32 d5, d4, d6
  81. vst1.32 {d5[0]}, [r0,:32]!
  82. vst1.32 {d5[1]}, [r4,:32]!
  83. bne 1b
  84. pop {r4-r6,pc}
  85. endfunc