| @@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ | |||||
| arm/vp8dsp_init_armv6.o \ | arm/vp8dsp_init_armv6.o \ | ||||
| arm/vp8dsp_armv6.o | arm/vp8dsp_armv6.o | ||||
| VFP-OBJS += arm/fmtconvert_vfp.o | |||||
| VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | ||||
| arm/synth_filter_vfp.o | arm/synth_filter_vfp.o | ||||
| VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o | VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o | ||||
| VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o | VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o | ||||
| VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o | |||||
| VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o | |||||
| NEON-OBJS += arm/fmtconvert_neon.o | NEON-OBJS += arm/fmtconvert_neon.o | ||||
| @@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx | |||||
| { | { | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { | |||||
| if (have_vfp(cpu_flags)) { | |||||
| if (!have_vfpv3(cpu_flags)) { | if (!have_vfpv3(cpu_flags)) { | ||||
| // These functions don't use anything armv6 specific in themselves, | |||||
| // but ff_float_to_int16_vfp which is in the same assembly source | |||||
| // file does, thus the whole file requires armv6 to be built. | |||||
| c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; | ||||
| c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; | c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; | ||||
| } | } | ||||
| c->float_to_int16 = ff_float_to_int16_vfp; | |||||
| if (have_armv6(cpu_flags)) { | |||||
| c->float_to_int16 = ff_float_to_int16_vfp; | |||||
| } | |||||
| } | } | ||||
| if (have_neon(cpu_flags)) { | if (have_neon(cpu_flags)) { | ||||
| @@ -1,5 +1,4 @@ | |||||
| /* | /* | ||||
| * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> | |||||
| * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org> | * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org> | ||||
| * | * | ||||
| * This file is part of Libav. | * This file is part of Libav. | ||||
| @@ -22,62 +21,6 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
| /** | |||||
| * ARM VFP optimized float to int16 conversion. | |||||
| * Assume that len is a positive number and is multiple of 8, destination | |||||
| * buffer is at least 4 bytes aligned (8 bytes alignment is better for | |||||
| * performance), little-endian byte sex. | |||||
| */ | |||||
| @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) | |||||
| function ff_float_to_int16_vfp, export=1 | |||||
| push {r4-r8,lr} | |||||
| vpush {d8-d11} | |||||
| vldmia r1!, {s16-s23} | |||||
| vcvt.s32.f32 s0, s16 | |||||
| vcvt.s32.f32 s1, s17 | |||||
| vcvt.s32.f32 s2, s18 | |||||
| vcvt.s32.f32 s3, s19 | |||||
| vcvt.s32.f32 s4, s20 | |||||
| vcvt.s32.f32 s5, s21 | |||||
| vcvt.s32.f32 s6, s22 | |||||
| vcvt.s32.f32 s7, s23 | |||||
| 1: | |||||
| subs r2, r2, #8 | |||||
| vmov r3, r4, s0, s1 | |||||
| vmov r5, r6, s2, s3 | |||||
| vmov r7, r8, s4, s5 | |||||
| vmov ip, lr, s6, s7 | |||||
| it gt | |||||
| vldmiagt r1!, {s16-s23} | |||||
| ssat r4, #16, r4 | |||||
| ssat r3, #16, r3 | |||||
| ssat r6, #16, r6 | |||||
| ssat r5, #16, r5 | |||||
| pkhbt r3, r3, r4, lsl #16 | |||||
| pkhbt r4, r5, r6, lsl #16 | |||||
| itttt gt | |||||
| vcvtgt.s32.f32 s0, s16 | |||||
| vcvtgt.s32.f32 s1, s17 | |||||
| vcvtgt.s32.f32 s2, s18 | |||||
| vcvtgt.s32.f32 s3, s19 | |||||
| itttt gt | |||||
| vcvtgt.s32.f32 s4, s20 | |||||
| vcvtgt.s32.f32 s5, s21 | |||||
| vcvtgt.s32.f32 s6, s22 | |||||
| vcvtgt.s32.f32 s7, s23 | |||||
| ssat r8, #16, r8 | |||||
| ssat r7, #16, r7 | |||||
| ssat lr, #16, lr | |||||
| ssat ip, #16, ip | |||||
| pkhbt r5, r7, r8, lsl #16 | |||||
| pkhbt r6, ip, lr, lsl #16 | |||||
| stmia r0!, {r3-r6} | |||||
| bgt 1b | |||||
| vpop {d8-d11} | |||||
| pop {r4-r8,pc} | |||||
| endfunc | |||||
| /** | /** | ||||
| * ARM VFP optimised int32 to float conversion. | * ARM VFP optimised int32 to float conversion. | ||||
| * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned | * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned | ||||
| @@ -0,0 +1,78 @@ | |||||
| /* | |||||
| * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/arm/asm.S" | |||||
| /** | |||||
| * ARM VFP optimized float to int16 conversion. | |||||
| * Assume that len is a positive number and is multiple of 8, destination | |||||
| * buffer is at least 4 bytes aligned (8 bytes alignment is better for | |||||
| * performance), little-endian byte sex. | |||||
| */ | |||||
| @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) | |||||
| function ff_float_to_int16_vfp, export=1 | |||||
| push {r4-r8,lr} | |||||
| vpush {d8-d11} | |||||
| vldmia r1!, {s16-s23} | |||||
| vcvt.s32.f32 s0, s16 | |||||
| vcvt.s32.f32 s1, s17 | |||||
| vcvt.s32.f32 s2, s18 | |||||
| vcvt.s32.f32 s3, s19 | |||||
| vcvt.s32.f32 s4, s20 | |||||
| vcvt.s32.f32 s5, s21 | |||||
| vcvt.s32.f32 s6, s22 | |||||
| vcvt.s32.f32 s7, s23 | |||||
| 1: | |||||
| subs r2, r2, #8 | |||||
| vmov r3, r4, s0, s1 | |||||
| vmov r5, r6, s2, s3 | |||||
| vmov r7, r8, s4, s5 | |||||
| vmov ip, lr, s6, s7 | |||||
| it gt | |||||
| vldmiagt r1!, {s16-s23} | |||||
| ssat r4, #16, r4 | |||||
| ssat r3, #16, r3 | |||||
| ssat r6, #16, r6 | |||||
| ssat r5, #16, r5 | |||||
| pkhbt r3, r3, r4, lsl #16 | |||||
| pkhbt r4, r5, r6, lsl #16 | |||||
| itttt gt | |||||
| vcvtgt.s32.f32 s0, s16 | |||||
| vcvtgt.s32.f32 s1, s17 | |||||
| vcvtgt.s32.f32 s2, s18 | |||||
| vcvtgt.s32.f32 s3, s19 | |||||
| itttt gt | |||||
| vcvtgt.s32.f32 s4, s20 | |||||
| vcvtgt.s32.f32 s5, s21 | |||||
| vcvtgt.s32.f32 s6, s22 | |||||
| vcvtgt.s32.f32 s7, s23 | |||||
| ssat r8, #16, r8 | |||||
| ssat r7, #16, r7 | |||||
| ssat lr, #16, lr | |||||
| ssat ip, #16, ip | |||||
| pkhbt r5, r7, r8, lsl #16 | |||||
| pkhbt r6, ip, lr, lsl #16 | |||||
| stmia r0!, {r3-r6} | |||||
| bgt 1b | |||||
| vpop {d8-d11} | |||||
| pop {r4-r8,pc} | |||||
| endfunc | |||||