You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.0KB

  1. /*
  2. * ARM NEON optimised Format Conversion Utils
  3. * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  4. * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>b
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "config.h"
  23. #include "libavutil/arm/asm.S"
  24. function ff_int32_to_float_fmul_scalar_neon, export=1
  25. VFP vdup.32 q0, d0[0]
  26. VFP len .req r2
  27. NOVFP vdup.32 q0, r2
  28. NOVFP len .req r3
  29. vld1.32 {q1},[r1,:128]!
  30. vcvt.f32.s32 q3, q1
  31. vld1.32 {q2},[r1,:128]!
  32. vcvt.f32.s32 q8, q2
  33. 1: subs len, len, #8
  34. pld [r1, #16]
  35. vmul.f32 q9, q3, q0
  36. vmul.f32 q10, q8, q0
  37. beq 2f
  38. vld1.32 {q1},[r1,:128]!
  39. vcvt.f32.s32 q3, q1
  40. vld1.32 {q2},[r1,:128]!
  41. vcvt.f32.s32 q8, q2
  42. vst1.32 {q9}, [r0,:128]!
  43. vst1.32 {q10},[r0,:128]!
  44. b 1b
  45. 2: vst1.32 {q9}, [r0,:128]!
  46. vst1.32 {q10},[r0,:128]!
  47. bx lr
  48. .unreq len
  49. endfunc
  50. function ff_int32_to_float_fmul_array8_neon, export=1
  51. ldr r0, [sp]
  52. lsr r0, r0, #3
  53. subs r0, r0, #1
  54. beq 1f
  55. 2:
  56. vld1.32 {q0-q1}, [r2,:128]!
  57. vld1.32 {q2-q3}, [r2,:128]!
  58. vld1.32 {d20}, [r3]!
  59. subs r0, r0, #2
  60. vcvt.f32.s32 q0, q0
  61. vcvt.f32.s32 q1, q1
  62. vdup.32 q8, d20[0]
  63. vcvt.f32.s32 q2, q2
  64. vcvt.f32.s32 q3, q3
  65. vmul.f32 q0, q0, q8
  66. vdup.32 q9, d20[1]
  67. vmul.f32 q1, q1, q8
  68. vmul.f32 q2, q2, q9
  69. vmul.f32 q3, q3, q9
  70. vst1.32 {q0-q1}, [r1,:128]!
  71. vst1.32 {q2-q3}, [r1,:128]!
  72. bgt 2b
  73. it lt
  74. bxlt lr
  75. 1:
  76. vld1.32 {q0-q1}, [r2,:128]
  77. vld1.32 {d16[],d17[]}, [r3]
  78. vcvt.f32.s32 q0, q0
  79. vcvt.f32.s32 q1, q1
  80. vmul.f32 q0, q0, q8
  81. vmul.f32 q1, q1, q8
  82. vst1.32 {q0-q1}, [r1,:128]
  83. bx lr
  84. endfunc