arm: fmtconvert: Split armv6 fmtconvert code off from vfp code

12 years ago · f0389eb777
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
                                          arm/vp8dsp_init_armv6.o       \
                                          arm/vp8dsp_armv6.o

 VFP-OBJS                               += arm/fmtconvert_vfp.o

 VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
                                          arm/synth_filter_vfp.o
 VFP-OBJS-$(CONFIG_FFT)                 += arm/fft_vfp.o
 VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
 VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o
 VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp_armv6.o

 NEON-OBJS                              += arm/fmtconvert_neon.o

--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
 {
    int cpu_flags = av_get_cpu_flags();

    if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
    if (have_vfp(cpu_flags)) {
        if (!have_vfpv3(cpu_flags)) {
            // These functions don't use anything armv6 specific in themselves,
            // but ff_float_to_int16_vfp which is in the same assembly source
            // file does, thus the whole file requires armv6 to be built.
            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
            c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
        }

        c->float_to_int16 = ff_float_to_int16_vfp;
        if (have_armv6(cpu_flags)) {
            c->float_to_int16 = ff_float_to_int16_vfp;
        }
    }

    if (have_neon(cpu_flags)) {
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,4 @@
 /*
 * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
 * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
 *
 * This file is part of Libav.
@@ -22,62 +21,6 @@
 #include "config.h"
 #include "libavutil/arm/asm.S"

 /**
 * ARM VFP optimized float to int16 conversion.
 * Assume that len is a positive number and is multiple of 8, destination
 * buffer is at least 4 bytes aligned (8 bytes alignment is better for
 * performance), little-endian byte sex.
 */
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
 function ff_float_to_int16_vfp, export=1
        push            {r4-r8,lr}
        vpush           {d8-d11}
        vldmia          r1!, {s16-s23}
        vcvt.s32.f32    s0,  s16
        vcvt.s32.f32    s1,  s17
        vcvt.s32.f32    s2,  s18
        vcvt.s32.f32    s3,  s19
        vcvt.s32.f32    s4,  s20
        vcvt.s32.f32    s5,  s21
        vcvt.s32.f32    s6,  s22
        vcvt.s32.f32    s7,  s23
 1:
        subs            r2,  r2,  #8
        vmov            r3,  r4,  s0, s1
        vmov            r5,  r6,  s2, s3
        vmov            r7,  r8,  s4, s5
        vmov            ip,  lr,  s6, s7
        it              gt
        vldmiagt        r1!, {s16-s23}
        ssat            r4,  #16, r4
        ssat            r3,  #16, r3
        ssat            r6,  #16, r6
        ssat            r5,  #16, r5
        pkhbt           r3,  r3,  r4, lsl #16
        pkhbt           r4,  r5,  r6, lsl #16
        itttt           gt
        vcvtgt.s32.f32  s0,  s16
        vcvtgt.s32.f32  s1,  s17
        vcvtgt.s32.f32  s2,  s18
        vcvtgt.s32.f32  s3,  s19
        itttt           gt
        vcvtgt.s32.f32  s4,  s20
        vcvtgt.s32.f32  s5,  s21
        vcvtgt.s32.f32  s6,  s22
        vcvtgt.s32.f32  s7,  s23
        ssat            r8,  #16, r8
        ssat            r7,  #16, r7
        ssat            lr,  #16, lr
        ssat            ip,  #16, ip
        pkhbt           r5,  r7,  r8, lsl #16
        pkhbt           r6,  ip,  lr, lsl #16
        stmia           r0!, {r3-r6}
        bgt             1b

        vpop            {d8-d11}
        pop             {r4-r8,pc}
 endfunc

 /**
 * ARM VFP optimised int32 to float conversion.
 * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
--- a/libavcodec/arm/fmtconvert_vfp_armv6.S
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -0,0 +1,78 @@
 /*
 * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "config.h"
 #include "libavutil/arm/asm.S"

 /**
 * ARM VFP optimized float to int16 conversion.
 * Assume that len is a positive number and is multiple of 8, destination
 * buffer is at least 4 bytes aligned (8 bytes alignment is better for
 * performance), little-endian byte sex.
 */
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
 function ff_float_to_int16_vfp, export=1
        push            {r4-r8,lr}
        vpush           {d8-d11}
        vldmia          r1!, {s16-s23}
        vcvt.s32.f32    s0,  s16
        vcvt.s32.f32    s1,  s17
        vcvt.s32.f32    s2,  s18
        vcvt.s32.f32    s3,  s19
        vcvt.s32.f32    s4,  s20
        vcvt.s32.f32    s5,  s21
        vcvt.s32.f32    s6,  s22
        vcvt.s32.f32    s7,  s23
 1:
        subs            r2,  r2,  #8
        vmov            r3,  r4,  s0, s1
        vmov            r5,  r6,  s2, s3
        vmov            r7,  r8,  s4, s5
        vmov            ip,  lr,  s6, s7
        it              gt
        vldmiagt        r1!, {s16-s23}
        ssat            r4,  #16, r4
        ssat            r3,  #16, r3
        ssat            r6,  #16, r6
        ssat            r5,  #16, r5
        pkhbt           r3,  r3,  r4, lsl #16
        pkhbt           r4,  r5,  r6, lsl #16
        itttt           gt
        vcvtgt.s32.f32  s0,  s16
        vcvtgt.s32.f32  s1,  s17
        vcvtgt.s32.f32  s2,  s18
        vcvtgt.s32.f32  s3,  s19
        itttt           gt
        vcvtgt.s32.f32  s4,  s20
        vcvtgt.s32.f32  s5,  s21
        vcvtgt.s32.f32  s6,  s22
        vcvtgt.s32.f32  s7,  s23
        ssat            r8,  #16, r8
        ssat            r7,  #16, r7
        ssat            lr,  #16, lr
        ssat            ip,  #16, ip
        pkhbt           r5,  r7,  r8, lsl #16
        pkhbt           r6,  ip,  lr, lsl #16
        stmia           r0!, {r3-r6}
        bgt             1b

        vpop            {d8-d11}
        pop             {r4-r8,pc}
 endfunc