| @@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ | |||
| arm/vp8dsp_init_armv6.o \ | |||
| arm/vp8dsp_armv6.o | |||
| VFP-OBJS += arm/fmtconvert_vfp.o | |||
| VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | |||
| arm/synth_filter_vfp.o | |||
| VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o | |||
| VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o | |||
| VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o | |||
| VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp_armv6.o | |||
| NEON-OBJS += arm/fmtconvert_neon.o | |||
| @@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { | |||
| if (have_vfp(cpu_flags)) { | |||
| if (!have_vfpv3(cpu_flags)) { | |||
| // These functions don't use anything armv6 specific in themselves, | |||
| // but ff_float_to_int16_vfp which is in the same assembly source | |||
| // file does, thus the whole file requires armv6 to be built. | |||
| c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; | |||
| c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; | |||
| } | |||
| c->float_to_int16 = ff_float_to_int16_vfp; | |||
| if (have_armv6(cpu_flags)) { | |||
| c->float_to_int16 = ff_float_to_int16_vfp; | |||
| } | |||
| } | |||
| if (have_neon(cpu_flags)) { | |||
| @@ -1,5 +1,4 @@ | |||
| /* | |||
| * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> | |||
| * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org> | |||
| * | |||
| * This file is part of Libav. | |||
| @@ -22,62 +21,6 @@ | |||
| #include "config.h" | |||
| #include "libavutil/arm/asm.S" | |||
| /** | |||
| * ARM VFP optimized float to int16 conversion. | |||
| * Assume that len is a positive number and is multiple of 8, destination | |||
| * buffer is at least 4 bytes aligned (8 bytes alignment is better for | |||
| * performance), little-endian byte sex. | |||
| */ | |||
| @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) | |||
| function ff_float_to_int16_vfp, export=1 | |||
| push {r4-r8,lr} | |||
| vpush {d8-d11} | |||
| vldmia r1!, {s16-s23} | |||
| vcvt.s32.f32 s0, s16 | |||
| vcvt.s32.f32 s1, s17 | |||
| vcvt.s32.f32 s2, s18 | |||
| vcvt.s32.f32 s3, s19 | |||
| vcvt.s32.f32 s4, s20 | |||
| vcvt.s32.f32 s5, s21 | |||
| vcvt.s32.f32 s6, s22 | |||
| vcvt.s32.f32 s7, s23 | |||
| 1: | |||
| subs r2, r2, #8 | |||
| vmov r3, r4, s0, s1 | |||
| vmov r5, r6, s2, s3 | |||
| vmov r7, r8, s4, s5 | |||
| vmov ip, lr, s6, s7 | |||
| it gt | |||
| vldmiagt r1!, {s16-s23} | |||
| ssat r4, #16, r4 | |||
| ssat r3, #16, r3 | |||
| ssat r6, #16, r6 | |||
| ssat r5, #16, r5 | |||
| pkhbt r3, r3, r4, lsl #16 | |||
| pkhbt r4, r5, r6, lsl #16 | |||
| itttt gt | |||
| vcvtgt.s32.f32 s0, s16 | |||
| vcvtgt.s32.f32 s1, s17 | |||
| vcvtgt.s32.f32 s2, s18 | |||
| vcvtgt.s32.f32 s3, s19 | |||
| itttt gt | |||
| vcvtgt.s32.f32 s4, s20 | |||
| vcvtgt.s32.f32 s5, s21 | |||
| vcvtgt.s32.f32 s6, s22 | |||
| vcvtgt.s32.f32 s7, s23 | |||
| ssat r8, #16, r8 | |||
| ssat r7, #16, r7 | |||
| ssat lr, #16, lr | |||
| ssat ip, #16, ip | |||
| pkhbt r5, r7, r8, lsl #16 | |||
| pkhbt r6, ip, lr, lsl #16 | |||
| stmia r0!, {r3-r6} | |||
| bgt 1b | |||
| vpop {d8-d11} | |||
| pop {r4-r8,pc} | |||
| endfunc | |||
| /** | |||
| * ARM VFP optimised int32 to float conversion. | |||
| * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned | |||
| @@ -0,0 +1,78 @@ | |||
| /* | |||
| * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #include "libavutil/arm/asm.S" | |||
| /** | |||
| * ARM VFP optimized float to int16 conversion. | |||
| * Assume that len is a positive number and is multiple of 8, destination | |||
| * buffer is at least 4 bytes aligned (8 bytes alignment is better for | |||
| * performance), little-endian byte sex. | |||
| */ | |||
| @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) | |||
| function ff_float_to_int16_vfp, export=1 | |||
| push {r4-r8,lr} | |||
| vpush {d8-d11} | |||
| vldmia r1!, {s16-s23} | |||
| vcvt.s32.f32 s0, s16 | |||
| vcvt.s32.f32 s1, s17 | |||
| vcvt.s32.f32 s2, s18 | |||
| vcvt.s32.f32 s3, s19 | |||
| vcvt.s32.f32 s4, s20 | |||
| vcvt.s32.f32 s5, s21 | |||
| vcvt.s32.f32 s6, s22 | |||
| vcvt.s32.f32 s7, s23 | |||
| 1: | |||
| subs r2, r2, #8 | |||
| vmov r3, r4, s0, s1 | |||
| vmov r5, r6, s2, s3 | |||
| vmov r7, r8, s4, s5 | |||
| vmov ip, lr, s6, s7 | |||
| it gt | |||
| vldmiagt r1!, {s16-s23} | |||
| ssat r4, #16, r4 | |||
| ssat r3, #16, r3 | |||
| ssat r6, #16, r6 | |||
| ssat r5, #16, r5 | |||
| pkhbt r3, r3, r4, lsl #16 | |||
| pkhbt r4, r5, r6, lsl #16 | |||
| itttt gt | |||
| vcvtgt.s32.f32 s0, s16 | |||
| vcvtgt.s32.f32 s1, s17 | |||
| vcvtgt.s32.f32 s2, s18 | |||
| vcvtgt.s32.f32 s3, s19 | |||
| itttt gt | |||
| vcvtgt.s32.f32 s4, s20 | |||
| vcvtgt.s32.f32 s5, s21 | |||
| vcvtgt.s32.f32 s6, s22 | |||
| vcvtgt.s32.f32 s7, s23 | |||
| ssat r8, #16, r8 | |||
| ssat r7, #16, r7 | |||
| ssat lr, #16, lr | |||
| ssat ip, #16, ip | |||
| pkhbt r5, r7, r8, lsl #16 | |||
| pkhbt r6, ip, lr, lsl #16 | |||
| stmia r0!, {r3-r6} | |||
| bgt 1b | |||
| vpop {d8-d11} | |||
| pop {r4-r8,pc} | |||
| endfunc | |||