| 
							- /*
 -  * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net>
 -  *
 -  * This file is part of Libav.
 -  *
 -  * Libav is free software; you can redistribute it and/or
 -  * modify it under the terms of the GNU Lesser General Public
 -  * License as published by the Free Software Foundation; either
 -  * version 2.1 of the License, or (at your option) any later version.
 -  *
 -  * Libav is distributed in the hope that it will be useful,
 -  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 -  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -  * Lesser General Public License for more details.
 -  *
 -  * You should have received a copy of the GNU Lesser General Public
 -  * License along with Libav; if not, write to the Free Software
 -  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 -  */
 - 
 - #include "libavutil/arm/asm.S"
 - 
 - #include "asm-offsets.h"
 - 
 - .macro resample_one     fmt, es=2
 - function ff_resample_one_\fmt\()_neon, export=1
 -         push            {r4, r5}
 -         add             r1, r1, r2, lsl #\es
 - 
 -         ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
 -         ldr             ip, [sp, #8] /* index */
 -         ldr             r5, [r0, #FILTER_LENGTH]
 -         and             r2, ip, r2 /* (index & phase_mask) */
 -         ldr             r4, [r0, #PHASE_SHIFT]
 -         lsr             r4, ip, r4 /* compute sample_index */
 -         mul             r2, r2, r5
 - 
 -         ldr             ip, [r0, #FILTER_BANK]
 -         add             r3, r3, r4, lsl #\es /* &src[sample_index] */
 - 
 -         cmp             r5, #8
 -         add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
 - 
 -         blt             5f
 - 8:
 -         subs            r5, r5, #8
 -         LOAD4
 -         MUL4
 - 7:
 -         LOAD4
 -         beq             6f
 -         cmp             r5, #8
 -         MLA4
 -         blt             4f
 -         subs            r5, r5, #8
 -         LOAD4
 -         MLA4
 -         b               7b
 - 6:
 -         MLA4
 -         STORE
 -         pop             {r4, r5}
 -         bx              lr
 - 5:
 -         INIT4
 - 4:      /* remaining filter_length 1 to 7 */
 -         cmp             r5, #4
 -         blt             2f
 -         subs            r5, r5, #4
 -         LOAD4
 -         MLA4
 -         beq             0f
 - 2:      /* remaining filter_length 1 to 3 */
 -         cmp             r5, #2
 -         blt             1f
 -         subs            r5, r5, #2
 -         LOAD2
 -         MLA2
 -         beq             0f
 - 1:      /* remaining filter_length 1 */
 -         LOAD1
 -         MLA1
 - 0:
 -         STORE
 -         pop             {r4, r5}
 -         bx              lr
 - endfunc
 - 
 - .purgem LOAD1
 - .purgem LOAD2
 - .purgem LOAD4
 - .purgem MLA1
 - .purgem MLA2
 - .purgem MLA4
 - .purgem MUL4
 - .purgem INIT4
 - .purgem STORE
 - .endm
 - 
 - 
 - /* float32 */
 - .macro  LOAD1
 -         veor.32         d0, d0
 -         vld1.32         {d0[0]}, [r0]! /* load filter */
 -         vld1.32         {d4[0]}, [r3]! /* load src */
 - .endm
 - .macro  LOAD2
 -         vld1.32         {d0}, [r0]! /* load filter */
 -         vld1.32         {d4}, [r3]! /* load src */
 - .endm
 - .macro  LOAD4
 -         vld1.32         {d0,d1}, [r0]! /* load filter */
 -         vld1.32         {d4,d5}, [r3]! /* load src */
 - .endm
 - .macro  MLA1
 -         vmla.f32        d16, d0, d4[0]
 - .endm
 - .macro  MLA2
 -         vmla.f32        d16, d0, d4
 - .endm
 - .macro  MLA4
 -         vmla.f32        d16, d0, d4
 -         vmla.f32        d17, d1, d5
 - .endm
 - .macro  MUL4
 -         vmul.f32        d16, d0, d4
 -         vmul.f32        d17, d1, d5
 - .endm
 - .macro  INIT4
 -         veor.f32        q8, q8
 - .endm
 - .macro  STORE
 -         vpadd.f32       d16, d16, d17
 -         vpadd.f32       d16, d16, d16
 -         vst1.32         d16[0], [r1]
 - .endm
 - 
 - resample_one flt, 2
 - 
 - 
 - /* s32 */
 - .macro  LOAD1
 -         veor.32         d0, d0
 -         vld1.32         {d0[0]}, [r0]! /* load filter */
 -         vld1.32         {d4[0]}, [r3]! /* load src */
 - .endm
 - .macro  LOAD2
 -         vld1.32         {d0}, [r0]! /* load filter */
 -         vld1.32         {d4}, [r3]! /* load src */
 - .endm
 - .macro  LOAD4
 -         vld1.32         {d0,d1}, [r0]! /* load filter */
 -         vld1.32         {d4,d5}, [r3]! /* load src */
 - .endm
 - .macro  MLA1
 -         vmlal.s32       q8, d0, d4[0]
 - .endm
 - .macro  MLA2
 -         vmlal.s32       q8, d0, d4
 - .endm
 - .macro  MLA4
 -         vmlal.s32       q8, d0, d4
 -         vmlal.s32       q9, d1, d5
 - .endm
 - .macro  MUL4
 -         vmull.s32       q8, d0, d4
 -         vmull.s32       q9, d1, d5
 - .endm
 - .macro  INIT4
 -         veor.s64        q8, q8
 -         veor.s64        q9, q9
 - .endm
 - .macro  STORE
 -         vadd.s64        q8, q8, q9
 -         vadd.s64        d16, d16, d17
 -         vqrshrn.s64     d16, q8, #30
 -         vst1.32         d16[0], [r1]
 - .endm
 - 
 - resample_one s32, 2
 - 
 - 
 - /* s16 */
 - .macro  LOAD1
 -         veor.16         d0, d0
 -         vld1.16         {d0[0]}, [r0]! /* load filter */
 -         vld1.16         {d4[0]}, [r3]! /* load src */
 - .endm
 - .macro  LOAD2
 -         veor.16         d0, d0
 -         vld1.32         {d0[0]}, [r0]! /* load filter */
 -         veor.16         d4, d4
 -         vld1.32         {d4[0]}, [r3]! /* load src */
 - .endm
 - .macro  LOAD4
 -         vld1.16         {d0}, [r0]! /* load filter */
 -         vld1.16         {d4}, [r3]! /* load src */
 - .endm
 - .macro  MLA1
 -         vmlal.s16       q8, d0, d4[0]
 - .endm
 - .macro  MLA2
 -         vmlal.s16       q8, d0, d4
 - .endm
 - .macro  MLA4
 -         vmlal.s16       q8, d0, d4
 - .endm
 - .macro  MUL4
 -         vmull.s16       q8, d0, d4
 - .endm
 - .macro  INIT4
 -         veor.s32        q8, q8
 - .endm
 - .macro  STORE
 -         vpadd.s32       d16, d16, d17
 -         vpadd.s32       d16, d16, d16
 -         vqrshrn.s32     d16, q8, #15
 -         vst1.16         d16[0], [r1]
 - .endm
 - 
 - resample_one s16, 1
 - 
 - 
 - .macro resample_linear  fmt, es=2
 - function ff_resample_linear_\fmt\()_neon, export=1
 -         push            {r4, r5}
 -         add             r1, r1, r2, lsl #\es
 - 
 -         ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
 -         ldr             ip, [sp, #8] /* index */
 -         ldr             r5, [r0, #FILTER_LENGTH]
 -         and             r2, ip, r2 /* (index & phase_mask) */
 -         ldr             r4, [r0, #PHASE_SHIFT]
 -         lsr             r4, ip, r4 /* compute sample_index */
 -         mul             r2, r2, r5
 - 
 -         ldr             ip, [r0, #FILTER_BANK]
 -         add             r3, r3, r4, lsl #\es /* &src[sample_index] */
 - 
 -         cmp             r5, #8
 -         ldr             r4, [r0, #SRC_INCR]
 -         add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
 -         add             r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */
 - 
 -         blt             5f
 - 8:
 -         subs            r5, r5, #8
 -         LOAD4
 -         MUL4
 - 7:
 -         LOAD4
 -         beq             6f
 -         cmp             r5, #8
 -         MLA4
 -         blt             4f
 -         subs            r5, r5, #8
 -         LOAD4
 -         MLA4
 -         b               7b
 - 6:
 -         MLA4
 -         STORE
 -         pop             {r4, r5}
 -         bx              lr
 - 5:
 -         INIT4
 - 4:      /* remaining filter_length 1 to 7 */
 -         cmp             r5, #4
 -         blt             2f
 -         subs            r5, r5, #4
 -         LOAD4
 -         MLA4
 -         beq             0f
 - 2:      /* remaining filter_length 1 to 3 */
 -         cmp             r5, #2
 -         blt             1f
 -         subs            r5, r5, #2
 -         LOAD2
 -         MLA2
 -         beq             0f
 - 1:      /* remaining filter_length 1 */
 -         LOAD1
 -         MLA1
 - 0:
 -         STORE
 -         pop             {r4, r5}
 -         bx              lr
 - endfunc
 - 
 - .purgem LOAD1
 - .purgem LOAD2
 - .purgem LOAD4
 - .purgem MLA1
 - .purgem MLA2
 - .purgem MLA4
 - .purgem MUL4
 - .purgem INIT4
 - .purgem STORE
 - .endm
 - 
 - 
 - /* float32 linear */
 - .macro  LOAD1
 -         veor.32         d0, d0
 -         veor.32         d2, d2
 -         vld1.32         {d0[0]}, [r0]! /* load filter */
 -         vld1.32         {d2[0]}, [r2]! /* load filter */
 -         vld1.32         {d4[0]}, [r3]! /* load src */
 - .endm
 - .macro  LOAD2
 -         vld1.32         {d0}, [r0]! /* load filter */
 -         vld1.32         {d2}, [r2]! /* load filter */
 -         vld1.32         {d4}, [r3]! /* load src */
 - .endm
 - .macro  LOAD4
 -         vld1.32         {d0,d1}, [r0]! /* load filter */
 -         vld1.32         {d2,d3}, [r2]! /* load filter */
 -         vld1.32         {d4,d5}, [r3]! /* load src */
 - .endm
 - .macro  MLA1
 -         vmla.f32        d18, d0, d4[0]
 -         vmla.f32        d16, d2, d4[0]
 - .endm
 - .macro  MLA2
 -         vmla.f32        d18, d0, d4
 -         vmla.f32        d16, d2, d4
 - .endm
 - .macro  MLA4
 -         vmla.f32        q9, q0, q2
 -         vmla.f32        q8, q1, q2
 - .endm
 - .macro  MUL4
 -         vmul.f32        q9, q0, q2
 -         vmul.f32        q8, q1, q2
 - .endm
 - .macro  INIT4
 -         veor.f32        q9, q9
 -         veor.f32        q8, q8
 - .endm
 - .macro  STORE
 -         vldr            s0, [sp, #12] /* frac */
 -         vmov            s1, r4
 -         vcvt.f32.s32    d0, d0
 - 
 -         vsub.f32        q8, q8, q9 /* v2 - val */
 -         vpadd.f32       d18, d18, d19
 -         vpadd.f32       d16, d16, d17
 -         vpadd.f32       d2, d18, d18
 -         vpadd.f32       d1, d16, d16
 - 
 -         vmul.f32        s2, s2, s0 /* (v2 - val) * frac */
 -         vdiv.f32        s2, s2, s1 /* / c->src_incr */
 -         vadd.f32        s4, s4, s2
 - 
 -         vstr            s4, [r1]
 - .endm
 - 
 - resample_linear flt, 2
 
 
  |