You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
6.2KB

  1. /*
  2. * Copyright (c) 2013 RISC OS Open Ltd
  3. * Author: Ben Avison <bavison@riscosopen.org>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. CONTEXT .req a1
  23. ORIGOUT .req a2
  24. IN .req a3
  25. OUT .req v1
  26. REVTAB .req v2
  27. TCOS .req v3
  28. TSIN .req v4
  29. OLDFPSCR .req v5
  30. J0 .req a2
  31. J1 .req a4
  32. J2 .req ip
  33. J3 .req lr
  34. .macro prerotation_innerloop
  35. .set trig_lo, k
  36. .set trig_hi, n4 - k - 2
  37. .set in_lo, trig_lo * 2
  38. .set in_hi, trig_hi * 2
  39. vldr d8, [TCOS, #trig_lo*4] @ s16,s17
  40. vldr d9, [TCOS, #trig_hi*4] @ s18,s19
  41. vldr s0, [IN, #in_hi*4 + 12]
  42. vldr s1, [IN, #in_hi*4 + 4]
  43. vldr s2, [IN, #in_lo*4 + 12]
  44. vldr s3, [IN, #in_lo*4 + 4]
  45. vmul.f s8, s0, s16 @ vector operation
  46. vldr d10, [TSIN, #trig_lo*4] @ s20,s21
  47. vldr d11, [TSIN, #trig_hi*4] @ s22,s23
  48. vldr s4, [IN, #in_lo*4]
  49. vldr s5, [IN, #in_lo*4 + 8]
  50. vldr s6, [IN, #in_hi*4]
  51. vldr s7, [IN, #in_hi*4 + 8]
  52. ldr J0, [REVTAB, #trig_lo*2]
  53. vmul.f s12, s0, s20 @ vector operation
  54. ldr J2, [REVTAB, #trig_hi*2]
  55. mov J1, J0, lsr #16
  56. and J0, J0, #255 @ halfword value will be < n4
  57. vmls.f s8, s4, s20 @ vector operation
  58. mov J3, J2, lsr #16
  59. and J2, J2, #255 @ halfword value will be < n4
  60. add J0, OUT, J0, lsl #3
  61. vmla.f s12, s4, s16 @ vector operation
  62. add J1, OUT, J1, lsl #3
  63. add J2, OUT, J2, lsl #3
  64. add J3, OUT, J3, lsl #3
  65. vstr s8, [J0]
  66. vstr s9, [J1]
  67. vstr s10, [J2]
  68. vstr s11, [J3]
  69. vstr s12, [J0, #4]
  70. vstr s13, [J1, #4]
  71. vstr s14, [J2, #4]
  72. vstr s15, [J3, #4]
  73. .set k, k + 2
  74. .endm
  75. .macro postrotation_innerloop tail, head
  76. .set trig_lo_head, n8 - k - 2
  77. .set trig_hi_head, n8 + k
  78. .set out_lo_head, trig_lo_head * 2
  79. .set out_hi_head, trig_hi_head * 2
  80. .set trig_lo_tail, n8 - (k - 2) - 2
  81. .set trig_hi_tail, n8 + (k - 2)
  82. .set out_lo_tail, trig_lo_tail * 2
  83. .set out_hi_tail, trig_hi_tail * 2
  84. .if (k & 2) == 0
  85. TCOS_D0_HEAD .req d10 @ s20,s21
  86. TCOS_D1_HEAD .req d11 @ s22,s23
  87. TCOS_S0_TAIL .req s24
  88. .else
  89. TCOS_D0_HEAD .req d12 @ s24,s25
  90. TCOS_D1_HEAD .req d13 @ s26,s27
  91. TCOS_S0_TAIL .req s20
  92. .endif
  93. .ifnc "\tail",""
  94. vmls.f s8, s0, TCOS_S0_TAIL @ vector operation
  95. .endif
  96. .ifnc "\head",""
  97. vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17
  98. vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19
  99. vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4]
  100. .endif
  101. .ifnc "\tail",""
  102. vmla.f s12, s4, TCOS_S0_TAIL @ vector operation
  103. .endif
  104. .ifnc "\head",""
  105. vldr s0, [OUT, #out_lo_head*4]
  106. vldr s1, [OUT, #out_lo_head*4 + 8]
  107. vldr s2, [OUT, #out_hi_head*4]
  108. vldr s3, [OUT, #out_hi_head*4 + 8]
  109. vldr s4, [OUT, #out_lo_head*4 + 4]
  110. vldr s5, [OUT, #out_lo_head*4 + 12]
  111. vldr s6, [OUT, #out_hi_head*4 + 4]
  112. vldr s7, [OUT, #out_hi_head*4 + 12]
  113. .endif
  114. .ifnc "\tail",""
  115. vstr s8, [OUT, #out_lo_tail*4]
  116. vstr s9, [OUT, #out_lo_tail*4 + 8]
  117. vstr s10, [OUT, #out_hi_tail*4]
  118. vstr s11, [OUT, #out_hi_tail*4 + 8]
  119. .endif
  120. .ifnc "\head",""
  121. vmul.f s8, s4, s16 @ vector operation
  122. .endif
  123. .ifnc "\tail",""
  124. vstr s12, [OUT, #out_hi_tail*4 + 12]
  125. vstr s13, [OUT, #out_hi_tail*4 + 4]
  126. vstr s14, [OUT, #out_lo_tail*4 + 12]
  127. vstr s15, [OUT, #out_lo_tail*4 + 4]
  128. .endif
  129. .ifnc "\head",""
  130. vmul.f s12, s0, s16 @ vector operation
  131. vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4]
  132. .endif
  133. .unreq TCOS_D0_HEAD
  134. .unreq TCOS_D1_HEAD
  135. .unreq TCOS_S0_TAIL
  136. .ifnc "\head",""
  137. .set k, k + 2
  138. .endif
  139. .endm
  140. /* void ff_imdct_half_vfp(FFTContext *s,
  141. * FFTSample *output,
  142. * const FFTSample *input)
  143. */
  144. function ff_imdct_half_vfp, export=1
  145. ldr ip, [CONTEXT, #5*4] @ mdct_bits
  146. teq ip, #6
  147. it ne
  148. bne X(ff_imdct_half_c) @ only case currently accelerated is the one used by DCA
  149. .set n, 1<<6
  150. .set n2, n/2
  151. .set n4, n/4
  152. .set n8, n/8
  153. push {v1-v5,lr}
  154. vpush {s16-s27}
  155. fmrx OLDFPSCR, FPSCR
  156. ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
  157. fmxr FPSCR, lr
  158. mov OUT, ORIGOUT
  159. ldr REVTAB, [CONTEXT, #2*4]
  160. ldr TCOS, [CONTEXT, #6*4]
  161. ldr TSIN, [CONTEXT, #7*4]
  162. .set k, 0
  163. .rept n8/2
  164. prerotation_innerloop
  165. .endr
  166. fmxr FPSCR, OLDFPSCR
  167. mov a1, OUT
  168. bl X(ff_fft16_vfp)
  169. ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
  170. fmxr FPSCR, lr
  171. .set k, 0
  172. postrotation_innerloop , head
  173. .rept n8/2 - 1
  174. postrotation_innerloop tail, head
  175. .endr
  176. postrotation_innerloop tail
  177. fmxr FPSCR, OLDFPSCR
  178. vpop {s16-s27}
  179. pop {v1-v5,pc}
  180. endfunc
  181. .unreq CONTEXT
  182. .unreq ORIGOUT
  183. .unreq IN
  184. .unreq OUT
  185. .unreq REVTAB
  186. .unreq TCOS
  187. .unreq TSIN
  188. .unreq OLDFPSCR
  189. .unreq J0
  190. .unreq J1
  191. .unreq J2
  192. .unreq J3