|
|
|
@@ -0,0 +1,243 @@ |
|
|
|
/* |
|
|
|
* Copyright (c) 2013 RISC OS Open Ltd |
|
|
|
* Author: Ben Avison <bavison@riscosopen.org> |
|
|
|
* |
|
|
|
* This file is part of Libav. |
|
|
|
* |
|
|
|
* Libav is free software; you can redistribute it and/or |
|
|
|
* modify it under the terms of the GNU Lesser General Public |
|
|
|
* License as published by the Free Software Foundation; either |
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
|
|
|
* |
|
|
|
* Libav is distributed in the hope that it will be useful, |
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
|
|
* Lesser General Public License for more details. |
|
|
|
* |
|
|
|
* You should have received a copy of the GNU Lesser General Public |
|
|
|
* License along with Libav; if not, write to the Free Software |
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "libavutil/arm/asm.S" |
|
|
|
|
|
|
|
IMDCT .req r0 |
|
|
|
ORIG_P_SB .req r1 |
|
|
|
P_SB_OFF .req r2 |
|
|
|
I .req r0 |
|
|
|
P_SB2_UP .req r1 |
|
|
|
OLDFPSCR .req r2 |
|
|
|
P_SB2_DN .req r3 |
|
|
|
P_WIN_DN .req r4 |
|
|
|
P_OUT_DN .req r5 |
|
|
|
P_SB .req r6 |
|
|
|
J_WRAP .req r7 |
|
|
|
P_WIN_UP .req r12 |
|
|
|
P_OUT_UP .req r14 |
|
|
|
|
|
|
|
SCALE .req s0 |
|
|
|
SBUF_DAT_REV0 .req s4 |
|
|
|
SBUF_DAT_REV1 .req s5 |
|
|
|
SBUF_DAT_REV2 .req s6 |
|
|
|
SBUF_DAT_REV3 .req s7 |
|
|
|
VA0 .req s8 |
|
|
|
VA3 .req s11 |
|
|
|
VB0 .req s12 |
|
|
|
VB3 .req s15 |
|
|
|
VC0 .req s8 |
|
|
|
VC3 .req s11 |
|
|
|
VD0 .req s12 |
|
|
|
VD3 .req s15 |
|
|
|
SBUF_DAT0 .req s16 |
|
|
|
SBUF_DAT1 .req s17 |
|
|
|
SBUF_DAT2 .req s18 |
|
|
|
SBUF_DAT3 .req s19 |
|
|
|
SBUF_DAT_ALT0 .req s20 |
|
|
|
SBUF_DAT_ALT1 .req s21 |
|
|
|
SBUF_DAT_ALT2 .req s22 |
|
|
|
SBUF_DAT_ALT3 .req s23 |
|
|
|
WIN_DN_DAT0 .req s24 |
|
|
|
WIN_UP_DAT0 .req s28 |
|
|
|
|
|
|
|
|
|
|
|
.macro inner_loop half, tail, head |
|
|
|
.if (OFFSET & (64*4)) == 0 @ even numbered call |
|
|
|
SBUF_DAT_THIS0 .req SBUF_DAT0 |
|
|
|
SBUF_DAT_THIS1 .req SBUF_DAT1 |
|
|
|
SBUF_DAT_THIS2 .req SBUF_DAT2 |
|
|
|
SBUF_DAT_THIS3 .req SBUF_DAT3 |
|
|
|
.ifnc "\head","" |
|
|
|
vldr d8, [P_SB, #OFFSET] @ d8 = SBUF_DAT |
|
|
|
vldr d9, [P_SB, #OFFSET+8] |
|
|
|
.endif |
|
|
|
.else |
|
|
|
SBUF_DAT_THIS0 .req SBUF_DAT_ALT0 |
|
|
|
SBUF_DAT_THIS1 .req SBUF_DAT_ALT1 |
|
|
|
SBUF_DAT_THIS2 .req SBUF_DAT_ALT2 |
|
|
|
SBUF_DAT_THIS3 .req SBUF_DAT_ALT3 |
|
|
|
.ifnc "\head","" |
|
|
|
vldr d10, [P_SB, #OFFSET] @ d10 = SBUF_DAT_ALT |
|
|
|
vldr d11, [P_SB, #OFFSET+8] |
|
|
|
.endif |
|
|
|
.endif |
|
|
|
.ifnc "\tail","" |
|
|
|
.ifc "\half","ab" |
|
|
|
vmls.f VA0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors |
|
|
|
.else |
|
|
|
vmla.f VD0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors |
|
|
|
.endif |
|
|
|
.endif |
|
|
|
.ifnc "\head","" |
|
|
|
vldr d14, [P_WIN_UP, #OFFSET] @ d14 = WIN_UP_DAT |
|
|
|
vldr d15, [P_WIN_UP, #OFFSET+8] |
|
|
|
vldr d12, [P_WIN_DN, #OFFSET] @ d12 = WIN_DN_DAT |
|
|
|
vldr d13, [P_WIN_DN, #OFFSET+8] |
|
|
|
vmov SBUF_DAT_REV3, SBUF_DAT_THIS0 |
|
|
|
vmov SBUF_DAT_REV2, SBUF_DAT_THIS1 |
|
|
|
vmov SBUF_DAT_REV1, SBUF_DAT_THIS2 |
|
|
|
vmov SBUF_DAT_REV0, SBUF_DAT_THIS3 |
|
|
|
.ifc "\half","ab" |
|
|
|
vmla.f VB0, SBUF_DAT_THIS0, WIN_UP_DAT0 |
|
|
|
.else |
|
|
|
vmla.f VC0, SBUF_DAT_THIS0, WIN_UP_DAT0 |
|
|
|
.endif |
|
|
|
teq J_WRAP, #J |
|
|
|
bne 2f @ strongly predictable, so better than cond exec in this case |
|
|
|
sub P_SB, P_SB, #512*4 |
|
|
|
2: |
|
|
|
.set J, J - 64 |
|
|
|
.set OFFSET, OFFSET + 64*4 |
|
|
|
.endif |
|
|
|
.unreq SBUF_DAT_THIS0 |
|
|
|
.unreq SBUF_DAT_THIS1 |
|
|
|
.unreq SBUF_DAT_THIS2 |
|
|
|
.unreq SBUF_DAT_THIS3 |
|
|
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
/* void ff_synth_filter_float_vfp(FFTContext *imdct, |
|
|
|
* float *synth_buf_ptr, int *synth_buf_offset, |
|
|
|
* float synth_buf2[32], const float window[512], |
|
|
|
* float out[32], const float in[32], float scale) |
|
|
|
*/ |
|
|
|
function ff_synth_filter_float_vfp, export=1 |
|
|
|
push {r3-r7,lr} |
|
|
|
vpush {s16-s31} |
|
|
|
ldr lr, [P_SB_OFF] |
|
|
|
add a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half |
|
|
|
mov P_SB, a2 @ and keep a copy for ourselves |
|
|
|
bic J_WRAP, lr, #63 @ mangled to make testing for wrap easier in inner loop |
|
|
|
sub lr, lr, #32 |
|
|
|
and lr, lr, #512-32 |
|
|
|
str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call |
|
|
|
ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half |
|
|
|
VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case |
|
|
|
bl ff_imdct_half_c |
|
|
|
VFP vmov SCALE, s16 |
|
|
|
|
|
|
|
fmrx OLDFPSCR, FPSCR |
|
|
|
ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 |
|
|
|
fmxr FPSCR, lr |
|
|
|
ldr P_SB2_DN, [sp, #16*4] |
|
|
|
ldr P_WIN_DN, [sp, #(16+6+0)*4] |
|
|
|
ldr P_OUT_DN, [sp, #(16+6+1)*4] |
|
|
|
NOVFP vldr SCALE, [sp, #(16+6+3)*4] |
|
|
|
|
|
|
|
#define IMM_OFF_SKEW 956 /* also valid immediate constant when you add 16*4 */ |
|
|
|
add P_SB, P_SB, #IMM_OFF_SKEW @ so we can use -ve offsets to use full immediate offset range |
|
|
|
add P_SB2_UP, P_SB2_DN, #16*4 |
|
|
|
add P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW |
|
|
|
add P_OUT_UP, P_OUT_DN, #16*4 |
|
|
|
add P_SB2_DN, P_SB2_DN, #16*4 |
|
|
|
add P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW |
|
|
|
add P_OUT_DN, P_OUT_DN, #16*4 |
|
|
|
mov I, #4 |
|
|
|
1: |
|
|
|
vldmia P_SB2_UP!, {VB0-VB3} |
|
|
|
vldmdb P_SB2_DN!, {VA0-VA3} |
|
|
|
.set J, 512 - 64 |
|
|
|
.set OFFSET, -IMM_OFF_SKEW |
|
|
|
inner_loop ab,, head |
|
|
|
.rept 7 |
|
|
|
inner_loop ab, tail, head |
|
|
|
.endr |
|
|
|
inner_loop ab, tail |
|
|
|
add P_WIN_UP, P_WIN_UP, #4*4 |
|
|
|
sub P_WIN_DN, P_WIN_DN, #4*4 |
|
|
|
vmul.f VB0, VB0, SCALE @ SCALE treated as scalar |
|
|
|
add P_SB, P_SB, #(512+4)*4 |
|
|
|
subs I, I, #1 |
|
|
|
vmul.f VA0, VA0, SCALE |
|
|
|
vstmia P_OUT_UP!, {VB0-VB3} |
|
|
|
vstmdb P_OUT_DN!, {VA0-VA3} |
|
|
|
bne 1b |
|
|
|
|
|
|
|
add P_SB2_DN, P_SB2_DN, #(16+28-12)*4 |
|
|
|
sub P_SB2_UP, P_SB2_UP, #(16+16)*4 |
|
|
|
add P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4 |
|
|
|
mov I, #4 |
|
|
|
1: |
|
|
|
vldr.d d4, zero @ d4 = VC0 |
|
|
|
vldr.d d5, zero |
|
|
|
vldr.d d6, zero @ d6 = VD0 |
|
|
|
vldr.d d7, zero |
|
|
|
.set J, 512 - 64 |
|
|
|
.set OFFSET, -IMM_OFF_SKEW |
|
|
|
inner_loop cd,, head |
|
|
|
.rept 7 |
|
|
|
inner_loop cd, tail, head |
|
|
|
.endr |
|
|
|
inner_loop cd, tail |
|
|
|
add P_WIN_UP, P_WIN_UP, #4*4 |
|
|
|
sub P_WIN_DN, P_WIN_DN, #4*4 |
|
|
|
add P_SB, P_SB, #(512+4)*4 |
|
|
|
subs I, I, #1 |
|
|
|
vstmia P_SB2_UP!, {VC0-VC3} |
|
|
|
vstmdb P_SB2_DN!, {VD0-VD3} |
|
|
|
bne 1b |
|
|
|
|
|
|
|
fmxr FPSCR, OLDFPSCR |
|
|
|
vpop {s16-s31} |
|
|
|
pop {r3-r7,pc} |
|
|
|
endfunc |
|
|
|
|
|
|
|
.unreq IMDCT |
|
|
|
.unreq ORIG_P_SB |
|
|
|
.unreq P_SB_OFF |
|
|
|
.unreq I |
|
|
|
.unreq P_SB2_UP |
|
|
|
.unreq OLDFPSCR |
|
|
|
.unreq P_SB2_DN |
|
|
|
.unreq P_WIN_DN |
|
|
|
.unreq P_OUT_DN |
|
|
|
.unreq P_SB |
|
|
|
.unreq J_WRAP |
|
|
|
.unreq P_WIN_UP |
|
|
|
.unreq P_OUT_UP |
|
|
|
|
|
|
|
.unreq SCALE |
|
|
|
.unreq SBUF_DAT_REV0 |
|
|
|
.unreq SBUF_DAT_REV1 |
|
|
|
.unreq SBUF_DAT_REV2 |
|
|
|
.unreq SBUF_DAT_REV3 |
|
|
|
.unreq VA0 |
|
|
|
.unreq VA3 |
|
|
|
.unreq VB0 |
|
|
|
.unreq VB3 |
|
|
|
.unreq VC0 |
|
|
|
.unreq VC3 |
|
|
|
.unreq VD0 |
|
|
|
.unreq VD3 |
|
|
|
.unreq SBUF_DAT0 |
|
|
|
.unreq SBUF_DAT1 |
|
|
|
.unreq SBUF_DAT2 |
|
|
|
.unreq SBUF_DAT3 |
|
|
|
.unreq SBUF_DAT_ALT0 |
|
|
|
.unreq SBUF_DAT_ALT1 |
|
|
|
.unreq SBUF_DAT_ALT2 |
|
|
|
.unreq SBUF_DAT_ALT3 |
|
|
|
.unreq WIN_DN_DAT0 |
|
|
|
.unreq WIN_UP_DAT0 |
|
|
|
|
|
|
|
.align 3 |
|
|
|
zero: .word 0, 0 |