|
- /*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
- * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
- #include "config.h"
- #include "libavutil/arm/asm.S"
-
- /**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
- */
- @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
- function ff_float_to_int16_vfp, export=1
- push {r4-r8,lr}
- vpush {d8-d11}
- vldmia r1!, {s16-s23}
- vcvt.s32.f32 s0, s16
- vcvt.s32.f32 s1, s17
- vcvt.s32.f32 s2, s18
- vcvt.s32.f32 s3, s19
- vcvt.s32.f32 s4, s20
- vcvt.s32.f32 s5, s21
- vcvt.s32.f32 s6, s22
- vcvt.s32.f32 s7, s23
- 1:
- subs r2, r2, #8
- vmov r3, r4, s0, s1
- vmov r5, r6, s2, s3
- vmov r7, r8, s4, s5
- vmov ip, lr, s6, s7
- it gt
- vldmiagt r1!, {s16-s23}
- ssat r4, #16, r4
- ssat r3, #16, r3
- ssat r6, #16, r6
- ssat r5, #16, r5
- pkhbt r3, r3, r4, lsl #16
- pkhbt r4, r5, r6, lsl #16
- itttt gt
- vcvtgt.s32.f32 s0, s16
- vcvtgt.s32.f32 s1, s17
- vcvtgt.s32.f32 s2, s18
- vcvtgt.s32.f32 s3, s19
- itttt gt
- vcvtgt.s32.f32 s4, s20
- vcvtgt.s32.f32 s5, s21
- vcvtgt.s32.f32 s6, s22
- vcvtgt.s32.f32 s7, s23
- ssat r8, #16, r8
- ssat r7, #16, r7
- ssat lr, #16, lr
- ssat ip, #16, ip
- pkhbt r5, r7, r8, lsl #16
- pkhbt r6, ip, lr, lsl #16
- stmia r0!, {r3-r6}
- bgt 1b
-
- vpop {d8-d11}
- pop {r4-r8,pc}
- endfunc
-
- /**
- * ARM VFP optimised int32 to float conversion.
- * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
- * (16 bytes alignment is best for BCM2835), little-endian.
- */
- @ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len)
- function ff_int32_to_float_fmul_scalar_vfp, export=1
- VFP tmp .req a4
- VFP len .req a3
- NOVFP tmp .req a3
- NOVFP len .req a4
- NOVFP vmov s0, a3
- ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
- fmrx ip, FPSCR
- fmxr FPSCR, tmp
- 1:
- vldmia a2!, {s8-s15}
- vcvt.f32.s32 s8, s8
- vcvt.f32.s32 s9, s9
- vcvt.f32.s32 s10, s10
- vcvt.f32.s32 s11, s11
- vcvt.f32.s32 s12, s12
- vcvt.f32.s32 s13, s13
- vcvt.f32.s32 s14, s14
- vcvt.f32.s32 s15, s15
- vmul.f32 s8, s8, s0
- subs len, len, #8
- vstmia a1!, {s8-s11}
- vstmia a1!, {s12-s15}
- bne 1b
-
- fmxr FPSCR, ip
- bx lr
- endfunc
- .unreq tmp
- .unreq len
|