| @@ -31,6 +31,9 @@ OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o | |||
| OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o | |||
| OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o | |||
| OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_arm.o \ | |||
| arm/hpeldsp_init_arm.o | |||
| OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o | |||
| OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ | |||
| arm/rv40dsp_init_arm.o \ | |||
| @@ -58,6 +61,9 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ | |||
| arm/dsputil_armv6.o \ | |||
| arm/simple_idct_armv6.o \ | |||
| ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_armv6.o \ | |||
| arm/hpeldsp_init_armv6.o | |||
| VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o | |||
| NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ | |||
| @@ -76,6 +82,9 @@ NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \ | |||
| NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \ | |||
| NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_neon.o \ | |||
| arm/hpeldsp_init_neon.o | |||
| NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | |||
| NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_neon.o \ | |||
| @@ -26,590 +26,6 @@ | |||
| #define pld @ | |||
| #endif | |||
| .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 | |||
| mov \Rd0, \Rn0, lsr #(\shift * 8) | |||
| mov \Rd1, \Rn1, lsr #(\shift * 8) | |||
| mov \Rd2, \Rn2, lsr #(\shift * 8) | |||
| mov \Rd3, \Rn3, lsr #(\shift * 8) | |||
| orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |||
| orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |||
| orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |||
| orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |||
| .endm | |||
| .macro ALIGN_DWORD shift, R0, R1, R2 | |||
| mov \R0, \R0, lsr #(\shift * 8) | |||
| orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |||
| mov \R1, \R1, lsr #(\shift * 8) | |||
| orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |||
| .endm | |||
| .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 | |||
| mov \Rdst0, \Rsrc0, lsr #(\shift * 8) | |||
| mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |||
| orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |||
| orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |||
| .endm | |||
| .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |||
| @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |||
| @ Rmask = 0xFEFEFEFE | |||
| @ Rn = destroy | |||
| eor \Rd0, \Rn0, \Rm0 | |||
| eor \Rd1, \Rn1, \Rm1 | |||
| orr \Rn0, \Rn0, \Rm0 | |||
| orr \Rn1, \Rn1, \Rm1 | |||
| and \Rd0, \Rd0, \Rmask | |||
| and \Rd1, \Rd1, \Rmask | |||
| sub \Rd0, \Rn0, \Rd0, lsr #1 | |||
| sub \Rd1, \Rn1, \Rd1, lsr #1 | |||
| .endm | |||
| .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |||
| @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |||
| @ Rmask = 0xFEFEFEFE | |||
| @ Rn = destroy | |||
| eor \Rd0, \Rn0, \Rm0 | |||
| eor \Rd1, \Rn1, \Rm1 | |||
| and \Rn0, \Rn0, \Rm0 | |||
| and \Rn1, \Rn1, \Rm1 | |||
| and \Rd0, \Rd0, \Rmask | |||
| and \Rd1, \Rd1, \Rmask | |||
| add \Rd0, \Rn0, \Rd0, lsr #1 | |||
| add \Rd1, \Rn1, \Rd1, lsr #1 | |||
| .endm | |||
| .macro JMP_ALIGN tmp, reg | |||
| ands \tmp, \reg, #3 | |||
| bic \reg, \reg, #3 | |||
| beq 1f | |||
| subs \tmp, \tmp, #1 | |||
| beq 2f | |||
| subs \tmp, \tmp, #1 | |||
| beq 3f | |||
| b 4f | |||
| .endm | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels16_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11, lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r7} | |||
| add r1, r1, r2 | |||
| stm r0, {r4-r7} | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r5,lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| subs r3, r3, #1 | |||
| pld [r1] | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 1, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 2, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 3, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r5,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_x2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r10,lr} | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r10,pc} | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_x2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r10,lr} | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r10,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_y2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| mov r3, r3, lsr #1 | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| 6: ldm r1, {r6-r7} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_y2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| mov r3, r3, lsr #1 | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| 6: ldm r1, {r6-r7} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| .ltorg | |||
| @ ---------------------------------------------------------------- | |||
| .macro RND_XY2_IT align, rnd | |||
| @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | |||
| @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | |||
| .if \align == 0 | |||
| ldm r1, {r6-r8} | |||
| .elseif \align == 3 | |||
| ldm r1, {r5-r7} | |||
| .else | |||
| ldm r1, {r8-r10} | |||
| .endif | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| .if \align == 0 | |||
| ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 | |||
| .elseif \align == 1 | |||
| ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 | |||
| ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |||
| .elseif \align == 2 | |||
| ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 | |||
| ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |||
| .elseif \align == 3 | |||
| ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 | |||
| .endif | |||
| ldr r14, =0x03030303 | |||
| tst r3, #1 | |||
| and r8, r4, r14 | |||
| and r9, r5, r14 | |||
| and r10, r6, r14 | |||
| and r11, r7, r14 | |||
| it eq | |||
| andeq r14, r14, r14, \rnd #1 | |||
| add r8, r8, r10 | |||
| add r9, r9, r11 | |||
| ldr r12, =0xfcfcfcfc >> 2 | |||
| itt eq | |||
| addeq r8, r8, r14 | |||
| addeq r9, r9, r14 | |||
| and r4, r12, r4, lsr #2 | |||
| and r5, r12, r5, lsr #2 | |||
| and r6, r12, r6, lsr #2 | |||
| and r7, r12, r7, lsr #2 | |||
| add r10, r4, r6 | |||
| add r11, r5, r7 | |||
| subs r3, r3, #1 | |||
| .endm | |||
| .macro RND_XY2_EXPAND align, rnd | |||
| RND_XY2_IT \align, \rnd | |||
| 6: push {r8-r11} | |||
| RND_XY2_IT \align, \rnd | |||
| pop {r4-r7} | |||
| add r4, r4, r8 | |||
| add r5, r5, r9 | |||
| ldr r14, =0x0f0f0f0f | |||
| add r6, r6, r10 | |||
| add r7, r7, r11 | |||
| and r4, r14, r4, lsr #2 | |||
| and r5, r14, r5, lsr #2 | |||
| add r4, r4, r6 | |||
| add r5, r5, r7 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bge 6b | |||
| pop {r4-r11,pc} | |||
| .endm | |||
| .align 5 | |||
| function ff_put_pixels8_xy2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} @ R14 is also called LR | |||
| JMP_ALIGN r5, r1 | |||
| 1: RND_XY2_EXPAND 0, lsl | |||
| .align 5 | |||
| 2: RND_XY2_EXPAND 1, lsl | |||
| .align 5 | |||
| 3: RND_XY2_EXPAND 2, lsl | |||
| .align 5 | |||
| 4: RND_XY2_EXPAND 3, lsl | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_xy2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: RND_XY2_EXPAND 0, lsr | |||
| .align 5 | |||
| 2: RND_XY2_EXPAND 1, lsr | |||
| .align 5 | |||
| 3: RND_XY2_EXPAND 2, lsr | |||
| .align 5 | |||
| 4: RND_XY2_EXPAND 3, lsr | |||
| endfunc | |||
| .align 5 | |||
| @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) | |||
| function ff_add_pixels_clamped_arm, export=1 | |||
| @@ -20,244 +20,6 @@ | |||
| #include "libavutil/arm/asm.S" | |||
| .macro call_2x_pixels type, subp | |||
| function ff_\type\()_pixels16\subp\()_armv6, export=1 | |||
| push {r0-r3, lr} | |||
| bl ff_\type\()_pixels8\subp\()_armv6 | |||
| pop {r0-r3, lr} | |||
| add r0, r0, #8 | |||
| add r1, r1, #8 | |||
| b ff_\type\()_pixels8\subp\()_armv6 | |||
| endfunc | |||
| .endm | |||
| call_2x_pixels avg | |||
| call_2x_pixels put, _x2 | |||
| call_2x_pixels put, _y2 | |||
| call_2x_pixels put, _x2_no_rnd | |||
| call_2x_pixels put, _y2_no_rnd | |||
| function ff_put_pixels16_armv6, export=1 | |||
| push {r4-r11} | |||
| 1: | |||
| ldr r5, [r1, #4] | |||
| ldr r6, [r1, #8] | |||
| ldr r7, [r1, #12] | |||
| ldr_post r4, r1, r2 | |||
| strd r6, r7, [r0, #8] | |||
| ldr r9, [r1, #4] | |||
| strd_post r4, r5, r0, r2 | |||
| ldr r10, [r1, #8] | |||
| ldr r11, [r1, #12] | |||
| ldr_post r8, r1, r2 | |||
| strd r10, r11, [r0, #8] | |||
| subs r3, r3, #2 | |||
| strd_post r8, r9, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_armv6, export=1 | |||
| push {r4-r7} | |||
| 1: | |||
| ldr r5, [r1, #4] | |||
| ldr_post r4, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| strd_post r4, r5, r0, r2 | |||
| ldr_post r6, r1, r2 | |||
| subs r3, r3, #2 | |||
| strd_post r6, r7, r0, r2 | |||
| bne 1b | |||
| pop {r4-r7} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_x2_armv6, export=1 | |||
| push {r4-r11, lr} | |||
| mov r12, #1 | |||
| orr r12, r12, r12, lsl #8 | |||
| orr r12, r12, r12, lsl #16 | |||
| 1: | |||
| ldr r4, [r1] | |||
| subs r3, r3, #2 | |||
| ldr r5, [r1, #4] | |||
| ldr r7, [r1, #5] | |||
| lsr r6, r4, #8 | |||
| ldr_pre r8, r1, r2 | |||
| orr r6, r6, r5, lsl #24 | |||
| ldr r9, [r1, #4] | |||
| ldr r11, [r1, #5] | |||
| lsr r10, r8, #8 | |||
| add r1, r1, r2 | |||
| orr r10, r10, r9, lsl #24 | |||
| eor r14, r4, r6 | |||
| uhadd8 r4, r4, r6 | |||
| eor r6, r5, r7 | |||
| uhadd8 r5, r5, r7 | |||
| and r14, r14, r12 | |||
| and r6, r6, r12 | |||
| uadd8 r4, r4, r14 | |||
| eor r14, r8, r10 | |||
| uadd8 r5, r5, r6 | |||
| eor r6, r9, r11 | |||
| uhadd8 r8, r8, r10 | |||
| and r14, r14, r12 | |||
| uhadd8 r9, r9, r11 | |||
| and r6, r6, r12 | |||
| uadd8 r8, r8, r14 | |||
| strd_post r4, r5, r0, r2 | |||
| uadd8 r9, r9, r6 | |||
| strd_post r8, r9, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11, pc} | |||
| endfunc | |||
| function ff_put_pixels8_y2_armv6, export=1 | |||
| push {r4-r11} | |||
| mov r12, #1 | |||
| orr r12, r12, r12, lsl #8 | |||
| orr r12, r12, r12, lsl #16 | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr_pre r6, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| 1: | |||
| subs r3, r3, #2 | |||
| uhadd8 r8, r4, r6 | |||
| eor r10, r4, r6 | |||
| uhadd8 r9, r5, r7 | |||
| eor r11, r5, r7 | |||
| and r10, r10, r12 | |||
| ldr_pre r4, r1, r2 | |||
| uadd8 r8, r8, r10 | |||
| and r11, r11, r12 | |||
| uadd8 r9, r9, r11 | |||
| ldr r5, [r1, #4] | |||
| uhadd8 r10, r4, r6 | |||
| eor r6, r4, r6 | |||
| uhadd8 r11, r5, r7 | |||
| and r6, r6, r12 | |||
| eor r7, r5, r7 | |||
| uadd8 r10, r10, r6 | |||
| and r7, r7, r12 | |||
| ldr_pre r6, r1, r2 | |||
| uadd8 r11, r11, r7 | |||
| strd_post r8, r9, r0, r2 | |||
| ldr r7, [r1, #4] | |||
| strd_post r10, r11, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_x2_no_rnd_armv6, export=1 | |||
| push {r4-r9, lr} | |||
| 1: | |||
| subs r3, r3, #2 | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr r7, [r1, #5] | |||
| ldr_pre r8, r1, r2 | |||
| ldr r9, [r1, #4] | |||
| ldr r14, [r1, #5] | |||
| add r1, r1, r2 | |||
| lsr r6, r4, #8 | |||
| orr r6, r6, r5, lsl #24 | |||
| lsr r12, r8, #8 | |||
| orr r12, r12, r9, lsl #24 | |||
| uhadd8 r4, r4, r6 | |||
| uhadd8 r5, r5, r7 | |||
| uhadd8 r8, r8, r12 | |||
| uhadd8 r9, r9, r14 | |||
| stm r0, {r4,r5} | |||
| add r0, r0, r2 | |||
| stm r0, {r8,r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| function ff_put_pixels8_y2_no_rnd_armv6, export=1 | |||
| push {r4-r9, lr} | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr_pre r6, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| 1: | |||
| subs r3, r3, #2 | |||
| uhadd8 r8, r4, r6 | |||
| ldr_pre r4, r1, r2 | |||
| uhadd8 r9, r5, r7 | |||
| ldr r5, [r1, #4] | |||
| uhadd8 r12, r4, r6 | |||
| ldr_pre r6, r1, r2 | |||
| uhadd8 r14, r5, r7 | |||
| ldr r7, [r1, #4] | |||
| stm r0, {r8,r9} | |||
| add r0, r0, r2 | |||
| stm r0, {r12,r14} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| function ff_avg_pixels8_armv6, export=1 | |||
| pld [r1, r2] | |||
| push {r4-r10, lr} | |||
| mov lr, #1 | |||
| orr lr, lr, lr, lsl #8 | |||
| orr lr, lr, lr, lsl #16 | |||
| ldrd r4, r5, [r0] | |||
| ldr r10, [r1, #4] | |||
| ldr_post r9, r1, r2 | |||
| subs r3, r3, #2 | |||
| 1: | |||
| pld [r1, r2] | |||
| eor r8, r4, r9 | |||
| uhadd8 r4, r4, r9 | |||
| eor r12, r5, r10 | |||
| ldrd_reg r6, r7, r0, r2 | |||
| uhadd8 r5, r5, r10 | |||
| and r8, r8, lr | |||
| ldr r10, [r1, #4] | |||
| and r12, r12, lr | |||
| uadd8 r4, r4, r8 | |||
| ldr_post r9, r1, r2 | |||
| eor r8, r6, r9 | |||
| uadd8 r5, r5, r12 | |||
| pld [r1, r2, lsl #1] | |||
| eor r12, r7, r10 | |||
| uhadd8 r6, r6, r9 | |||
| strd_post r4, r5, r0, r2 | |||
| uhadd8 r7, r7, r10 | |||
| beq 2f | |||
| and r8, r8, lr | |||
| ldrd_reg r4, r5, r0, r2 | |||
| uadd8 r6, r6, r8 | |||
| ldr r10, [r1, #4] | |||
| and r12, r12, lr | |||
| subs r3, r3, #2 | |||
| uadd8 r7, r7, r12 | |||
| ldr_post r9, r1, r2 | |||
| strd_post r6, r7, r0, r2 | |||
| b 1b | |||
| 2: | |||
| and r8, r8, lr | |||
| and r12, r12, lr | |||
| uadd8 r6, r6, r8 | |||
| uadd8 r7, r7, r12 | |||
| strd_post r6, r7, r0, r2 | |||
| pop {r4-r10, pc} | |||
| endfunc | |||
| function ff_add_pixels_clamped_armv6, export=1 | |||
| push {r4-r8,lr} | |||
| mov r3, #8 | |||
| @@ -30,24 +30,6 @@ void ff_simple_idct_arm(int16_t *data); | |||
| static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||
| static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||
| void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_pixels16_xy2_arm, ff_put_pixels8_xy2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8) | |||
| void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, | |||
| int line_size); | |||
| @@ -76,7 +58,6 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) | |||
| av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| const int high_bit_depth = avctx->bits_per_raw_sample > 8; | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| ff_put_pixels_clamped = c->put_pixels_clamped; | |||
| @@ -99,26 +80,6 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) | |||
| c->add_pixels_clamped = ff_add_pixels_clamped_arm; | |||
| if (!high_bit_depth) { | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_arm; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm; | |||
| c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_arm; | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_arm; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_arm; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_arm; | |||
| c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_arm; | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_arm; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_arm; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_arm; | |||
| c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_arm; | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_arm; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm; | |||
| c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm; | |||
| } | |||
| if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx); | |||
| if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx); | |||
| if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx); | |||
| @@ -27,24 +27,6 @@ void ff_simple_idct_armv6(int16_t *data); | |||
| void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_add_pixels_clamped_armv6(const int16_t *block, | |||
| uint8_t *restrict pixels, | |||
| int line_size); | |||
| @@ -82,29 +64,6 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx) | |||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||
| } | |||
| if (!high_bit_depth) { | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; | |||
| /* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */ | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_armv6; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6; | |||
| /* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */ | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6; | |||
| /* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */ | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6; | |||
| /* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */ | |||
| c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6; | |||
| c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; | |||
| } | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_armv6; | |||
| c->add_pixels_clamped = ff_add_pixels_clamped_armv6; | |||
| @@ -32,33 +32,6 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_clear_block_neon(int16_t *block); | |||
| void ff_clear_blocks_neon(int16_t *blocks); | |||
| void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| @@ -92,38 +65,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||
| if (!high_bit_depth) { | |||
| c->clear_block = ff_clear_block_neon; | |||
| c->clear_blocks = ff_clear_blocks_neon; | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_neon; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon; | |||
| c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon; | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_neon; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon; | |||
| c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon; | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; | |||
| c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon; | |||
| c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon; | |||
| c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon; | |||
| c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon; | |||
| c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon; | |||
| c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon; | |||
| c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon; | |||
| c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon; | |||
| c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon; | |||
| c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon; | |||
| c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon; | |||
| c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon; | |||
| } | |||
| c->add_pixels_clamped = ff_add_pixels_clamped_neon; | |||
| @@ -37,394 +37,6 @@ function ff_clear_blocks_neon, export=1 | |||
| bx lr | |||
| endfunc | |||
| .macro pixels16 rnd=1, avg=0 | |||
| .if \avg | |||
| mov r12, r0 | |||
| .endif | |||
| 1: vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vld1.8 {q2}, [r1], r2 | |||
| pld [r1, r2, lsl #2] | |||
| vld1.8 {q3}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| pld [r1, r2, lsl #1] | |||
| .if \avg | |||
| vld1.8 {q8}, [r12,:128], r2 | |||
| vrhadd.u8 q0, q0, q8 | |||
| vld1.8 {q9}, [r12,:128], r2 | |||
| vrhadd.u8 q1, q1, q9 | |||
| vld1.8 {q10}, [r12,:128], r2 | |||
| vrhadd.u8 q2, q2, q10 | |||
| vld1.8 {q11}, [r12,:128], r2 | |||
| vrhadd.u8 q3, q3, q11 | |||
| .endif | |||
| subs r3, r3, #4 | |||
| vst1.64 {q0}, [r0,:128], r2 | |||
| vst1.64 {q1}, [r0,:128], r2 | |||
| vst1.64 {q2}, [r0,:128], r2 | |||
| vst1.64 {q3}, [r0,:128], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_x2 rnd=1, avg=0 | |||
| 1: vld1.8 {d0-d2}, [r1], r2 | |||
| vld1.8 {d4-d6}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| subs r3, r3, #2 | |||
| vext.8 q1, q0, q1, #1 | |||
| avg q0, q0, q1 | |||
| vext.8 q3, q2, q3, #1 | |||
| avg q2, q2, q3 | |||
| .if \avg | |||
| vld1.8 {q1}, [r0,:128], r2 | |||
| vld1.8 {q3}, [r0,:128] | |||
| vrhadd.u8 q0, q0, q1 | |||
| vrhadd.u8 q2, q2, q3 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q0}, [r0,:128], r2 | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_y2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| 1: subs r3, r3, #2 | |||
| avg q2, q0, q1 | |||
| vld1.8 {q0}, [r1], r2 | |||
| avg q3, q0, q1 | |||
| vld1.8 {q1}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128], r2 | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q2, q2, q8 | |||
| vrhadd.u8 q3, q3, q9 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| vst1.8 {q3}, [r0,:128], r2 | |||
| bne 1b | |||
| avg q2, q0, q1 | |||
| vld1.8 {q0}, [r1], r2 | |||
| avg q3, q0, q1 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128], r2 | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q2, q2, q8 | |||
| vrhadd.u8 q3, q3, q9 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| vst1.8 {q3}, [r0,:128], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_xy2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vld1.8 {d4-d6}, [r1], r2 | |||
| NRND vmov.i16 q13, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| vext.8 q1, q0, q1, #1 | |||
| vext.8 q3, q2, q3, #1 | |||
| vaddl.u8 q8, d0, d2 | |||
| vaddl.u8 q10, d1, d3 | |||
| vaddl.u8 q9, d4, d6 | |||
| vaddl.u8 q11, d5, d7 | |||
| 1: subs r3, r3, #2 | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| pld [r1] | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q15, q0, q1, #1 | |||
| vadd.u16 q1 , q10, q11 | |||
| shrn d28, q12, #2 | |||
| NRND vadd.u16 q1, q1, q13 | |||
| shrn d29, q1, #2 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128] | |||
| vrhadd.u8 q14, q14, q8 | |||
| .endif | |||
| vaddl.u8 q8, d0, d30 | |||
| vld1.8 {d2-d4}, [r1], r2 | |||
| vaddl.u8 q10, d1, d31 | |||
| vst1.8 {q14}, [r0,:128], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| pld [r1, r2] | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q2, q1, q2, #1 | |||
| vadd.u16 q0, q10, q11 | |||
| shrn d30, q12, #2 | |||
| NRND vadd.u16 q0, q0, q13 | |||
| shrn d31, q0, #2 | |||
| .if \avg | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q15, q15, q9 | |||
| .endif | |||
| vaddl.u8 q9, d2, d4 | |||
| vaddl.u8 q11, d3, d5 | |||
| vst1.8 {q15}, [r0,:128], r2 | |||
| bgt 1b | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q15, q0, q1, #1 | |||
| vadd.u16 q1 , q10, q11 | |||
| shrn d28, q12, #2 | |||
| NRND vadd.u16 q1, q1, q13 | |||
| shrn d29, q1, #2 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128] | |||
| vrhadd.u8 q14, q14, q8 | |||
| .endif | |||
| vaddl.u8 q8, d0, d30 | |||
| vaddl.u8 q10, d1, d31 | |||
| vst1.8 {q14}, [r0,:128], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vadd.u16 q0, q10, q11 | |||
| shrn d30, q12, #2 | |||
| NRND vadd.u16 q0, q0, q13 | |||
| shrn d31, q0, #2 | |||
| .if \avg | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q15, q15, q9 | |||
| .endif | |||
| vst1.8 {q15}, [r0,:128], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels8 rnd=1, avg=0 | |||
| 1: vld1.8 {d0}, [r1], r2 | |||
| vld1.8 {d1}, [r1], r2 | |||
| vld1.8 {d2}, [r1], r2 | |||
| pld [r1, r2, lsl #2] | |||
| vld1.8 {d3}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| pld [r1, r2, lsl #1] | |||
| .if \avg | |||
| vld1.8 {d4}, [r0,:64], r2 | |||
| vrhadd.u8 d0, d0, d4 | |||
| vld1.8 {d5}, [r0,:64], r2 | |||
| vrhadd.u8 d1, d1, d5 | |||
| vld1.8 {d6}, [r0,:64], r2 | |||
| vrhadd.u8 d2, d2, d6 | |||
| vld1.8 {d7}, [r0,:64], r2 | |||
| vrhadd.u8 d3, d3, d7 | |||
| sub r0, r0, r2, lsl #2 | |||
| .endif | |||
| subs r3, r3, #4 | |||
| vst1.8 {d0}, [r0,:64], r2 | |||
| vst1.8 {d1}, [r0,:64], r2 | |||
| vst1.8 {d2}, [r0,:64], r2 | |||
| vst1.8 {d3}, [r0,:64], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_x2 rnd=1, avg=0 | |||
| 1: vld1.8 {q0}, [r1], r2 | |||
| vext.8 d1, d0, d1, #1 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vext.8 d3, d2, d3, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| subs r3, r3, #2 | |||
| vswp d1, d2 | |||
| avg q0, q0, q1 | |||
| .if \avg | |||
| vld1.8 {d4}, [r0,:64], r2 | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 q0, q0, q2 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d0}, [r0,:64], r2 | |||
| vst1.8 {d1}, [r0,:64], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_y2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {d0}, [r1], r2 | |||
| vld1.8 {d1}, [r1], r2 | |||
| 1: subs r3, r3, #2 | |||
| avg d4, d0, d1 | |||
| vld1.8 {d0}, [r1], r2 | |||
| avg d5, d0, d1 | |||
| vld1.8 {d1}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {d2}, [r0,:64], r2 | |||
| vld1.8 {d3}, [r0,:64] | |||
| vrhadd.u8 q2, q2, q1 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d4}, [r0,:64], r2 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| bne 1b | |||
| avg d4, d0, d1 | |||
| vld1.8 {d0}, [r1], r2 | |||
| avg d5, d0, d1 | |||
| .if \avg | |||
| vld1.8 {d2}, [r0,:64], r2 | |||
| vld1.8 {d3}, [r0,:64] | |||
| vrhadd.u8 q2, q2, q1 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d4}, [r0,:64], r2 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_xy2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| NRND vmov.i16 q11, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| vext.8 d4, d0, d1, #1 | |||
| vext.8 d6, d2, d3, #1 | |||
| vaddl.u8 q8, d0, d4 | |||
| vaddl.u8 q9, d2, d6 | |||
| 1: subs r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| pld [r1] | |||
| vadd.u16 q10, q8, q9 | |||
| vext.8 d4, d0, d1, #1 | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vaddl.u8 q8, d0, d4 | |||
| shrn d5, q10, #2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vadd.u16 q10, q8, q9 | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {d7}, [r0,:64] | |||
| vrhadd.u8 d5, d5, d7 | |||
| .endif | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| shrn d7, q10, #2 | |||
| .if \avg | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 d7, d7, d5 | |||
| .endif | |||
| vext.8 d6, d2, d3, #1 | |||
| vaddl.u8 q9, d2, d6 | |||
| vst1.8 {d7}, [r0,:64], r2 | |||
| bgt 1b | |||
| vld1.8 {q0}, [r1], r2 | |||
| vadd.u16 q10, q8, q9 | |||
| vext.8 d4, d0, d1, #1 | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vaddl.u8 q8, d0, d4 | |||
| shrn d5, q10, #2 | |||
| vadd.u16 q10, q8, q9 | |||
| .if \avg | |||
| vld1.8 {d7}, [r0,:64] | |||
| vrhadd.u8 d5, d5, d7 | |||
| .endif | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| shrn d7, q10, #2 | |||
| .if \avg | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 d7, d7, d5 | |||
| .endif | |||
| vst1.8 {d7}, [r0,:64], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixfunc pfx, name, suf, rnd=1, avg=0 | |||
| .if \rnd | |||
| .macro avg rd, rn, rm | |||
| vrhadd.u8 \rd, \rn, \rm | |||
| .endm | |||
| .macro shrn rd, rn, rm | |||
| vrshrn.u16 \rd, \rn, \rm | |||
| .endm | |||
| .macro NRND insn:vararg | |||
| .endm | |||
| .else | |||
| .macro avg rd, rn, rm | |||
| vhadd.u8 \rd, \rn, \rm | |||
| .endm | |||
| .macro shrn rd, rn, rm | |||
| vshrn.u16 \rd, \rn, \rm | |||
| .endm | |||
| .macro NRND insn:vararg | |||
| \insn | |||
| .endm | |||
| .endif | |||
| function ff_\pfx\name\suf\()_neon, export=1 | |||
| \name \rnd, \avg | |||
| endfunc | |||
| .purgem avg | |||
| .purgem shrn | |||
| .purgem NRND | |||
| .endm | |||
| .macro pixfunc2 pfx, name, avg=0 | |||
| pixfunc \pfx, \name, rnd=1, avg=\avg | |||
| pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg | |||
| .endm | |||
| function ff_put_h264_qpel16_mc00_neon, export=1 | |||
| mov r3, #16 | |||
| endfunc | |||
| pixfunc put_, pixels16, avg=0 | |||
| pixfunc2 put_, pixels16_x2, avg=0 | |||
| pixfunc2 put_, pixels16_y2, avg=0 | |||
| pixfunc2 put_, pixels16_xy2, avg=0 | |||
| function ff_avg_h264_qpel16_mc00_neon, export=1 | |||
| mov r3, #16 | |||
| endfunc | |||
| pixfunc avg_, pixels16, avg=1 | |||
| pixfunc2 avg_, pixels16_x2, avg=1 | |||
| pixfunc2 avg_, pixels16_y2, avg=1 | |||
| pixfunc2 avg_, pixels16_xy2, avg=1 | |||
| function ff_put_h264_qpel8_mc00_neon, export=1 | |||
| mov r3, #8 | |||
| endfunc | |||
| pixfunc put_, pixels8, avg=0 | |||
| pixfunc2 put_, pixels8_x2, avg=0 | |||
| pixfunc2 put_, pixels8_y2, avg=0 | |||
| pixfunc2 put_, pixels8_xy2, avg=0 | |||
| function ff_avg_h264_qpel8_mc00_neon, export=1 | |||
| mov r3, #8 | |||
| endfunc | |||
| pixfunc avg_, pixels8, avg=1 | |||
| pixfunc avg_, pixels8_x2, avg=1 | |||
| pixfunc avg_, pixels8_y2, avg=1 | |||
| pixfunc avg_, pixels8_xy2, avg=1 | |||
| function ff_put_pixels_clamped_neon, export=1 | |||
| vld1.16 {d16-d19}, [r0,:128]! | |||
| vqmovun.s16 d0, q8 | |||
| @@ -0,0 +1,611 @@ | |||
| @ | |||
| @ ARMv4 optimized DSP utils | |||
| @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |||
| @ | |||
| @ This file is part of FFmpeg. | |||
| @ | |||
| @ FFmpeg is free software; you can redistribute it and/or | |||
| @ modify it under the terms of the GNU Lesser General Public | |||
| @ License as published by the Free Software Foundation; either | |||
| @ version 2.1 of the License, or (at your option) any later version. | |||
| @ | |||
| @ FFmpeg is distributed in the hope that it will be useful, | |||
| @ but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| @ Lesser General Public License for more details. | |||
| @ | |||
| @ You should have received a copy of the GNU Lesser General Public | |||
| @ License along with FFmpeg; if not, write to the Free Software | |||
| @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| @ | |||
| #include "config.h" | |||
| #include "libavutil/arm/asm.S" | |||
| #if !HAVE_ARMV5TE_EXTERNAL | |||
| #define pld @ | |||
| #endif | |||
| .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 | |||
| mov \Rd0, \Rn0, lsr #(\shift * 8) | |||
| mov \Rd1, \Rn1, lsr #(\shift * 8) | |||
| mov \Rd2, \Rn2, lsr #(\shift * 8) | |||
| mov \Rd3, \Rn3, lsr #(\shift * 8) | |||
| orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |||
| orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |||
| orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |||
| orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |||
| .endm | |||
| .macro ALIGN_DWORD shift, R0, R1, R2 | |||
| mov \R0, \R0, lsr #(\shift * 8) | |||
| orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |||
| mov \R1, \R1, lsr #(\shift * 8) | |||
| orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |||
| .endm | |||
| .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 | |||
| mov \Rdst0, \Rsrc0, lsr #(\shift * 8) | |||
| mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |||
| orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |||
| orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |||
| .endm | |||
| .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |||
| @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |||
| @ Rmask = 0xFEFEFEFE | |||
| @ Rn = destroy | |||
| eor \Rd0, \Rn0, \Rm0 | |||
| eor \Rd1, \Rn1, \Rm1 | |||
| orr \Rn0, \Rn0, \Rm0 | |||
| orr \Rn1, \Rn1, \Rm1 | |||
| and \Rd0, \Rd0, \Rmask | |||
| and \Rd1, \Rd1, \Rmask | |||
| sub \Rd0, \Rn0, \Rd0, lsr #1 | |||
| sub \Rd1, \Rn1, \Rd1, lsr #1 | |||
| .endm | |||
| .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |||
| @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |||
| @ Rmask = 0xFEFEFEFE | |||
| @ Rn = destroy | |||
| eor \Rd0, \Rn0, \Rm0 | |||
| eor \Rd1, \Rn1, \Rm1 | |||
| and \Rn0, \Rn0, \Rm0 | |||
| and \Rn1, \Rn1, \Rm1 | |||
| and \Rd0, \Rd0, \Rmask | |||
| and \Rd1, \Rd1, \Rmask | |||
| add \Rd0, \Rn0, \Rd0, lsr #1 | |||
| add \Rd1, \Rn1, \Rd1, lsr #1 | |||
| .endm | |||
| .macro JMP_ALIGN tmp, reg | |||
| ands \tmp, \reg, #3 | |||
| bic \reg, \reg, #3 | |||
| beq 1f | |||
| subs \tmp, \tmp, #1 | |||
| beq 2f | |||
| subs \tmp, \tmp, #1 | |||
| beq 3f | |||
| b 4f | |||
| .endm | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels16_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11, lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r7} | |||
| add r1, r1, r2 | |||
| stm r0, {r4-r7} | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r11, pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r8} | |||
| add r1, r1, r2 | |||
| ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r9-r12} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r5,lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| subs r3, r3, #1 | |||
| pld [r1] | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 1, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 2, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r5,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r12} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD 3, r4, r5, r12 | |||
| pld [r1] | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r5,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_x2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r10,lr} | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r10,pc} | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_x2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r10,lr} | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 2b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |||
| ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bne 3b | |||
| pop {r4-r10,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r5, r10} | |||
| add r1, r1, r2 | |||
| ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 4b | |||
| pop {r4-r10,pc} | |||
| endfunc | |||
| @ ---------------------------------------------------------------- | |||
| .align 5 | |||
| function ff_put_pixels8_y2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| mov r3, r3, lsr #1 | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| 6: ldm r1, {r6-r7} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| pld [r1] | |||
| RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r7, r8, r9 | |||
| RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_y2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| mov r3, r3, lsr #1 | |||
| ldr r12, =0xfefefefe | |||
| JMP_ALIGN r5, r1 | |||
| 1: | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| 6: ldm r1, {r6-r7} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |||
| ldm r1, {r4-r5} | |||
| add r1, r1, r2 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| pld [r1] | |||
| NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |||
| subs r3, r3, #1 | |||
| stm r0, {r8-r9} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 2: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 1, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 3: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 2, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| .align 5 | |||
| 4: | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| 6: ldm r1, {r7-r9} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r7, r8, r9 | |||
| NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| ldm r1, {r4-r6} | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| ALIGN_DWORD 3, r4, r5, r6 | |||
| subs r3, r3, #1 | |||
| NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |||
| stm r0, {r10-r11} | |||
| add r0, r0, r2 | |||
| bne 6b | |||
| pop {r4-r11,pc} | |||
| endfunc | |||
| .ltorg | |||
| @ ---------------------------------------------------------------- | |||
| .macro RND_XY2_IT align, rnd | |||
| @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | |||
| @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | |||
| .if \align == 0 | |||
| ldm r1, {r6-r8} | |||
| .elseif \align == 3 | |||
| ldm r1, {r5-r7} | |||
| .else | |||
| ldm r1, {r8-r10} | |||
| .endif | |||
| add r1, r1, r2 | |||
| pld [r1] | |||
| .if \align == 0 | |||
| ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 | |||
| .elseif \align == 1 | |||
| ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 | |||
| ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |||
| .elseif \align == 2 | |||
| ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 | |||
| ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |||
| .elseif \align == 3 | |||
| ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 | |||
| .endif | |||
| ldr r14, =0x03030303 | |||
| tst r3, #1 | |||
| and r8, r4, r14 | |||
| and r9, r5, r14 | |||
| and r10, r6, r14 | |||
| and r11, r7, r14 | |||
| it eq | |||
| andeq r14, r14, r14, \rnd #1 | |||
| add r8, r8, r10 | |||
| add r9, r9, r11 | |||
| ldr r12, =0xfcfcfcfc >> 2 | |||
| itt eq | |||
| addeq r8, r8, r14 | |||
| addeq r9, r9, r14 | |||
| and r4, r12, r4, lsr #2 | |||
| and r5, r12, r5, lsr #2 | |||
| and r6, r12, r6, lsr #2 | |||
| and r7, r12, r7, lsr #2 | |||
| add r10, r4, r6 | |||
| add r11, r5, r7 | |||
| subs r3, r3, #1 | |||
| .endm | |||
| .macro RND_XY2_EXPAND align, rnd | |||
| RND_XY2_IT \align, \rnd | |||
| 6: push {r8-r11} | |||
| RND_XY2_IT \align, \rnd | |||
| pop {r4-r7} | |||
| add r4, r4, r8 | |||
| add r5, r5, r9 | |||
| ldr r14, =0x0f0f0f0f | |||
| add r6, r6, r10 | |||
| add r7, r7, r11 | |||
| and r4, r14, r4, lsr #2 | |||
| and r5, r14, r5, lsr #2 | |||
| add r4, r4, r6 | |||
| add r5, r5, r7 | |||
| stm r0, {r4-r5} | |||
| add r0, r0, r2 | |||
| bge 6b | |||
| pop {r4-r11,pc} | |||
| .endm | |||
| .align 5 | |||
| function ff_put_pixels8_xy2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} @ R14 is also called LR | |||
| JMP_ALIGN r5, r1 | |||
| 1: RND_XY2_EXPAND 0, lsl | |||
| .align 5 | |||
| 2: RND_XY2_EXPAND 1, lsl | |||
| .align 5 | |||
| 3: RND_XY2_EXPAND 2, lsl | |||
| .align 5 | |||
| 4: RND_XY2_EXPAND 3, lsl | |||
| endfunc | |||
| .align 5 | |||
| function ff_put_no_rnd_pixels8_xy2_arm, export=1 | |||
| @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @ block = word aligned, pixles = unaligned | |||
| pld [r1] | |||
| push {r4-r11,lr} | |||
| JMP_ALIGN r5, r1 | |||
| 1: RND_XY2_EXPAND 0, lsr | |||
| .align 5 | |||
| 2: RND_XY2_EXPAND 1, lsr | |||
| .align 5 | |||
| 3: RND_XY2_EXPAND 2, lsr | |||
| .align 5 | |||
| 4: RND_XY2_EXPAND 3, lsr | |||
| endfunc | |||
| @@ -0,0 +1,29 @@ | |||
| /* | |||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_ARM_HPELDSP_H | |||
| #define AVCODEC_ARM_HPELDSP_H | |||
| #include "libavcodec/hpeldsp.h" | |||
| void ff_hpeldsp_init_armv6(HpelDSPContext* c, int flags); | |||
| void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags); | |||
| #endif /* AVCODEC_ARM_HPELDSP_H */ | |||
| @@ -0,0 +1,259 @@ | |||
| /* | |||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/arm/asm.S" | |||
| .macro call_2x_pixels type, subp | |||
| function ff_\type\()_pixels16\subp\()_armv6, export=1 | |||
| push {r0-r3, lr} | |||
| bl ff_\type\()_pixels8\subp\()_armv6 | |||
| pop {r0-r3, lr} | |||
| add r0, r0, #8 | |||
| add r1, r1, #8 | |||
| b ff_\type\()_pixels8\subp\()_armv6 | |||
| endfunc | |||
| .endm | |||
| call_2x_pixels avg | |||
| call_2x_pixels put, _x2 | |||
| call_2x_pixels put, _y2 | |||
| call_2x_pixels put, _x2_no_rnd | |||
| call_2x_pixels put, _y2_no_rnd | |||
| function ff_put_pixels16_armv6, export=1 | |||
| push {r4-r11} | |||
| 1: | |||
| ldr r5, [r1, #4] | |||
| ldr r6, [r1, #8] | |||
| ldr r7, [r1, #12] | |||
| ldr_post r4, r1, r2 | |||
| strd r6, r7, [r0, #8] | |||
| ldr r9, [r1, #4] | |||
| strd_post r4, r5, r0, r2 | |||
| ldr r10, [r1, #8] | |||
| ldr r11, [r1, #12] | |||
| ldr_post r8, r1, r2 | |||
| strd r10, r11, [r0, #8] | |||
| subs r3, r3, #2 | |||
| strd_post r8, r9, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_armv6, export=1 | |||
| push {r4-r7} | |||
| 1: | |||
| ldr r5, [r1, #4] | |||
| ldr_post r4, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| strd_post r4, r5, r0, r2 | |||
| ldr_post r6, r1, r2 | |||
| subs r3, r3, #2 | |||
| strd_post r6, r7, r0, r2 | |||
| bne 1b | |||
| pop {r4-r7} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_x2_armv6, export=1 | |||
| push {r4-r11, lr} | |||
| mov r12, #1 | |||
| orr r12, r12, r12, lsl #8 | |||
| orr r12, r12, r12, lsl #16 | |||
| 1: | |||
| ldr r4, [r1] | |||
| subs r3, r3, #2 | |||
| ldr r5, [r1, #4] | |||
| ldr r7, [r1, #5] | |||
| lsr r6, r4, #8 | |||
| ldr_pre r8, r1, r2 | |||
| orr r6, r6, r5, lsl #24 | |||
| ldr r9, [r1, #4] | |||
| ldr r11, [r1, #5] | |||
| lsr r10, r8, #8 | |||
| add r1, r1, r2 | |||
| orr r10, r10, r9, lsl #24 | |||
| eor r14, r4, r6 | |||
| uhadd8 r4, r4, r6 | |||
| eor r6, r5, r7 | |||
| uhadd8 r5, r5, r7 | |||
| and r14, r14, r12 | |||
| and r6, r6, r12 | |||
| uadd8 r4, r4, r14 | |||
| eor r14, r8, r10 | |||
| uadd8 r5, r5, r6 | |||
| eor r6, r9, r11 | |||
| uhadd8 r8, r8, r10 | |||
| and r14, r14, r12 | |||
| uhadd8 r9, r9, r11 | |||
| and r6, r6, r12 | |||
| uadd8 r8, r8, r14 | |||
| strd_post r4, r5, r0, r2 | |||
| uadd8 r9, r9, r6 | |||
| strd_post r8, r9, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11, pc} | |||
| endfunc | |||
| function ff_put_pixels8_y2_armv6, export=1 | |||
| push {r4-r11} | |||
| mov r12, #1 | |||
| orr r12, r12, r12, lsl #8 | |||
| orr r12, r12, r12, lsl #16 | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr_pre r6, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| 1: | |||
| subs r3, r3, #2 | |||
| uhadd8 r8, r4, r6 | |||
| eor r10, r4, r6 | |||
| uhadd8 r9, r5, r7 | |||
| eor r11, r5, r7 | |||
| and r10, r10, r12 | |||
| ldr_pre r4, r1, r2 | |||
| uadd8 r8, r8, r10 | |||
| and r11, r11, r12 | |||
| uadd8 r9, r9, r11 | |||
| ldr r5, [r1, #4] | |||
| uhadd8 r10, r4, r6 | |||
| eor r6, r4, r6 | |||
| uhadd8 r11, r5, r7 | |||
| and r6, r6, r12 | |||
| eor r7, r5, r7 | |||
| uadd8 r10, r10, r6 | |||
| and r7, r7, r12 | |||
| ldr_pre r6, r1, r2 | |||
| uadd8 r11, r11, r7 | |||
| strd_post r8, r9, r0, r2 | |||
| ldr r7, [r1, #4] | |||
| strd_post r10, r11, r0, r2 | |||
| bne 1b | |||
| pop {r4-r11} | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels8_x2_no_rnd_armv6, export=1 | |||
| push {r4-r9, lr} | |||
| 1: | |||
| subs r3, r3, #2 | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr r7, [r1, #5] | |||
| ldr_pre r8, r1, r2 | |||
| ldr r9, [r1, #4] | |||
| ldr r14, [r1, #5] | |||
| add r1, r1, r2 | |||
| lsr r6, r4, #8 | |||
| orr r6, r6, r5, lsl #24 | |||
| lsr r12, r8, #8 | |||
| orr r12, r12, r9, lsl #24 | |||
| uhadd8 r4, r4, r6 | |||
| uhadd8 r5, r5, r7 | |||
| uhadd8 r8, r8, r12 | |||
| uhadd8 r9, r9, r14 | |||
| stm r0, {r4,r5} | |||
| add r0, r0, r2 | |||
| stm r0, {r8,r9} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| function ff_put_pixels8_y2_no_rnd_armv6, export=1 | |||
| push {r4-r9, lr} | |||
| ldr r4, [r1] | |||
| ldr r5, [r1, #4] | |||
| ldr_pre r6, r1, r2 | |||
| ldr r7, [r1, #4] | |||
| 1: | |||
| subs r3, r3, #2 | |||
| uhadd8 r8, r4, r6 | |||
| ldr_pre r4, r1, r2 | |||
| uhadd8 r9, r5, r7 | |||
| ldr r5, [r1, #4] | |||
| uhadd8 r12, r4, r6 | |||
| ldr_pre r6, r1, r2 | |||
| uhadd8 r14, r5, r7 | |||
| ldr r7, [r1, #4] | |||
| stm r0, {r8,r9} | |||
| add r0, r0, r2 | |||
| stm r0, {r12,r14} | |||
| add r0, r0, r2 | |||
| bne 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| function ff_avg_pixels8_armv6, export=1 | |||
| pld [r1, r2] | |||
| push {r4-r10, lr} | |||
| mov lr, #1 | |||
| orr lr, lr, lr, lsl #8 | |||
| orr lr, lr, lr, lsl #16 | |||
| ldrd r4, r5, [r0] | |||
| ldr r10, [r1, #4] | |||
| ldr_post r9, r1, r2 | |||
| subs r3, r3, #2 | |||
| 1: | |||
| pld [r1, r2] | |||
| eor r8, r4, r9 | |||
| uhadd8 r4, r4, r9 | |||
| eor r12, r5, r10 | |||
| ldrd_reg r6, r7, r0, r2 | |||
| uhadd8 r5, r5, r10 | |||
| and r8, r8, lr | |||
| ldr r10, [r1, #4] | |||
| and r12, r12, lr | |||
| uadd8 r4, r4, r8 | |||
| ldr_post r9, r1, r2 | |||
| eor r8, r6, r9 | |||
| uadd8 r5, r5, r12 | |||
| pld [r1, r2, lsl #1] | |||
| eor r12, r7, r10 | |||
| uhadd8 r6, r6, r9 | |||
| strd_post r4, r5, r0, r2 | |||
| uhadd8 r7, r7, r10 | |||
| beq 2f | |||
| and r8, r8, lr | |||
| ldrd_reg r4, r5, r0, r2 | |||
| uadd8 r6, r6, r8 | |||
| ldr r10, [r1, #4] | |||
| and r12, r12, lr | |||
| subs r3, r3, #2 | |||
| uadd8 r7, r7, r12 | |||
| ldr_post r9, r1, r2 | |||
| strd_post r6, r7, r0, r2 | |||
| b 1b | |||
| 2: | |||
| and r8, r8, lr | |||
| and r12, r12, lr | |||
| uadd8 r6, r6, r8 | |||
| uadd8 r7, r7, r12 | |||
| strd_post r6, r7, r0, r2 | |||
| pop {r4-r10, pc} | |||
| endfunc | |||
| @@ -0,0 +1,68 @@ | |||
| /* | |||
| * ARM optimized DSP utils | |||
| * Copyright (c) 2001 Lionel Ulmer | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/arm/cpu.h" | |||
| #include "libavcodec/bit_depth_template.c" // for CALL_2X_PIXELS | |||
| #include "hpeldsp_arm.h" | |||
| void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||
| CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_pixels16_xy2_arm, ff_put_pixels8_xy2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8) | |||
| CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8) | |||
| void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_arm; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm; | |||
| c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_arm; | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_arm; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_arm; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_arm; | |||
| c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_arm; | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_arm; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_arm; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_arm; | |||
| c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_arm; | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_arm; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm; | |||
| c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm; | |||
| if (have_armv6(cpu_flags)) ff_hpeldsp_init_armv6(c, flags); | |||
| if (have_neon(cpu_flags)) ff_hpeldsp_init_neon(c, flags); | |||
| } | |||
| @@ -0,0 +1,66 @@ | |||
| /* | |||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "libavutil/attributes.h" | |||
| #include "hpeldsp_arm.h" | |||
| void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| av_cold void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags) | |||
| { | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; | |||
| /* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */ | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_armv6; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6; | |||
| /* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */ | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6; | |||
| /* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */ | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6; | |||
| /* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */ | |||
| c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6; | |||
| c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; | |||
| } | |||
| @@ -0,0 +1,86 @@ | |||
| /* | |||
| * ARM NEON optimised DSP functions | |||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "hpeldsp_arm.h" | |||
| void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||
| void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags) | |||
| { | |||
| c->put_pixels_tab[0][0] = ff_put_pixels16_neon; | |||
| c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon; | |||
| c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon; | |||
| c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon; | |||
| c->put_pixels_tab[1][0] = ff_put_pixels8_neon; | |||
| c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon; | |||
| c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon; | |||
| c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon; | |||
| c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon; | |||
| c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon; | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon; | |||
| c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; | |||
| c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon; | |||
| c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon; | |||
| c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon; | |||
| c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon; | |||
| c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon; | |||
| c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon; | |||
| c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon; | |||
| c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon; | |||
| c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon; | |||
| c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon; | |||
| c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon; | |||
| c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon; | |||
| } | |||
| @@ -0,0 +1,410 @@ | |||
| /* | |||
| * ARM NEON optimised DSP functions | |||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/arm/asm.S" | |||
| .macro pixels16 rnd=1, avg=0 | |||
| .if \avg | |||
| mov r12, r0 | |||
| .endif | |||
| 1: vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vld1.8 {q2}, [r1], r2 | |||
| pld [r1, r2, lsl #2] | |||
| vld1.8 {q3}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| pld [r1, r2, lsl #1] | |||
| .if \avg | |||
| vld1.8 {q8}, [r12,:128], r2 | |||
| vrhadd.u8 q0, q0, q8 | |||
| vld1.8 {q9}, [r12,:128], r2 | |||
| vrhadd.u8 q1, q1, q9 | |||
| vld1.8 {q10}, [r12,:128], r2 | |||
| vrhadd.u8 q2, q2, q10 | |||
| vld1.8 {q11}, [r12,:128], r2 | |||
| vrhadd.u8 q3, q3, q11 | |||
| .endif | |||
| subs r3, r3, #4 | |||
| vst1.64 {q0}, [r0,:128], r2 | |||
| vst1.64 {q1}, [r0,:128], r2 | |||
| vst1.64 {q2}, [r0,:128], r2 | |||
| vst1.64 {q3}, [r0,:128], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_x2 rnd=1, avg=0 | |||
| 1: vld1.8 {d0-d2}, [r1], r2 | |||
| vld1.8 {d4-d6}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| subs r3, r3, #2 | |||
| vext.8 q1, q0, q1, #1 | |||
| avg q0, q0, q1 | |||
| vext.8 q3, q2, q3, #1 | |||
| avg q2, q2, q3 | |||
| .if \avg | |||
| vld1.8 {q1}, [r0,:128], r2 | |||
| vld1.8 {q3}, [r0,:128] | |||
| vrhadd.u8 q0, q0, q1 | |||
| vrhadd.u8 q2, q2, q3 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q0}, [r0,:128], r2 | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_y2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| 1: subs r3, r3, #2 | |||
| avg q2, q0, q1 | |||
| vld1.8 {q0}, [r1], r2 | |||
| avg q3, q0, q1 | |||
| vld1.8 {q1}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128], r2 | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q2, q2, q8 | |||
| vrhadd.u8 q3, q3, q9 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| vst1.8 {q3}, [r0,:128], r2 | |||
| bne 1b | |||
| avg q2, q0, q1 | |||
| vld1.8 {q0}, [r1], r2 | |||
| avg q3, q0, q1 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128], r2 | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q2, q2, q8 | |||
| vrhadd.u8 q3, q3, q9 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {q2}, [r0,:128], r2 | |||
| vst1.8 {q3}, [r0,:128], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels16_xy2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vld1.8 {d4-d6}, [r1], r2 | |||
| NRND vmov.i16 q13, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| vext.8 q1, q0, q1, #1 | |||
| vext.8 q3, q2, q3, #1 | |||
| vaddl.u8 q8, d0, d2 | |||
| vaddl.u8 q10, d1, d3 | |||
| vaddl.u8 q9, d4, d6 | |||
| vaddl.u8 q11, d5, d7 | |||
| 1: subs r3, r3, #2 | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| pld [r1] | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q15, q0, q1, #1 | |||
| vadd.u16 q1 , q10, q11 | |||
| shrn d28, q12, #2 | |||
| NRND vadd.u16 q1, q1, q13 | |||
| shrn d29, q1, #2 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128] | |||
| vrhadd.u8 q14, q14, q8 | |||
| .endif | |||
| vaddl.u8 q8, d0, d30 | |||
| vld1.8 {d2-d4}, [r1], r2 | |||
| vaddl.u8 q10, d1, d31 | |||
| vst1.8 {q14}, [r0,:128], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| pld [r1, r2] | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q2, q1, q2, #1 | |||
| vadd.u16 q0, q10, q11 | |||
| shrn d30, q12, #2 | |||
| NRND vadd.u16 q0, q0, q13 | |||
| shrn d31, q0, #2 | |||
| .if \avg | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q15, q15, q9 | |||
| .endif | |||
| vaddl.u8 q9, d2, d4 | |||
| vaddl.u8 q11, d3, d5 | |||
| vst1.8 {q15}, [r0,:128], r2 | |||
| bgt 1b | |||
| vld1.8 {d0-d2}, [r1], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vext.8 q15, q0, q1, #1 | |||
| vadd.u16 q1 , q10, q11 | |||
| shrn d28, q12, #2 | |||
| NRND vadd.u16 q1, q1, q13 | |||
| shrn d29, q1, #2 | |||
| .if \avg | |||
| vld1.8 {q8}, [r0,:128] | |||
| vrhadd.u8 q14, q14, q8 | |||
| .endif | |||
| vaddl.u8 q8, d0, d30 | |||
| vaddl.u8 q10, d1, d31 | |||
| vst1.8 {q14}, [r0,:128], r2 | |||
| vadd.u16 q12, q8, q9 | |||
| NRND vadd.u16 q12, q12, q13 | |||
| vadd.u16 q0, q10, q11 | |||
| shrn d30, q12, #2 | |||
| NRND vadd.u16 q0, q0, q13 | |||
| shrn d31, q0, #2 | |||
| .if \avg | |||
| vld1.8 {q9}, [r0,:128] | |||
| vrhadd.u8 q15, q15, q9 | |||
| .endif | |||
| vst1.8 {q15}, [r0,:128], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels8 rnd=1, avg=0 | |||
| 1: vld1.8 {d0}, [r1], r2 | |||
| vld1.8 {d1}, [r1], r2 | |||
| vld1.8 {d2}, [r1], r2 | |||
| pld [r1, r2, lsl #2] | |||
| vld1.8 {d3}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| pld [r1, r2, lsl #1] | |||
| .if \avg | |||
| vld1.8 {d4}, [r0,:64], r2 | |||
| vrhadd.u8 d0, d0, d4 | |||
| vld1.8 {d5}, [r0,:64], r2 | |||
| vrhadd.u8 d1, d1, d5 | |||
| vld1.8 {d6}, [r0,:64], r2 | |||
| vrhadd.u8 d2, d2, d6 | |||
| vld1.8 {d7}, [r0,:64], r2 | |||
| vrhadd.u8 d3, d3, d7 | |||
| sub r0, r0, r2, lsl #2 | |||
| .endif | |||
| subs r3, r3, #4 | |||
| vst1.8 {d0}, [r0,:64], r2 | |||
| vst1.8 {d1}, [r0,:64], r2 | |||
| vst1.8 {d2}, [r0,:64], r2 | |||
| vst1.8 {d3}, [r0,:64], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_x2 rnd=1, avg=0 | |||
| 1: vld1.8 {q0}, [r1], r2 | |||
| vext.8 d1, d0, d1, #1 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vext.8 d3, d2, d3, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| subs r3, r3, #2 | |||
| vswp d1, d2 | |||
| avg q0, q0, q1 | |||
| .if \avg | |||
| vld1.8 {d4}, [r0,:64], r2 | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 q0, q0, q2 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d0}, [r0,:64], r2 | |||
| vst1.8 {d1}, [r0,:64], r2 | |||
| bne 1b | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_y2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {d0}, [r1], r2 | |||
| vld1.8 {d1}, [r1], r2 | |||
| 1: subs r3, r3, #2 | |||
| avg d4, d0, d1 | |||
| vld1.8 {d0}, [r1], r2 | |||
| avg d5, d0, d1 | |||
| vld1.8 {d1}, [r1], r2 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {d2}, [r0,:64], r2 | |||
| vld1.8 {d3}, [r0,:64] | |||
| vrhadd.u8 q2, q2, q1 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d4}, [r0,:64], r2 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| bne 1b | |||
| avg d4, d0, d1 | |||
| vld1.8 {d0}, [r1], r2 | |||
| avg d5, d0, d1 | |||
| .if \avg | |||
| vld1.8 {d2}, [r0,:64], r2 | |||
| vld1.8 {d3}, [r0,:64] | |||
| vrhadd.u8 q2, q2, q1 | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vst1.8 {d4}, [r0,:64], r2 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixels8_xy2 rnd=1, avg=0 | |||
| sub r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| NRND vmov.i16 q11, #1 | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| vext.8 d4, d0, d1, #1 | |||
| vext.8 d6, d2, d3, #1 | |||
| vaddl.u8 q8, d0, d4 | |||
| vaddl.u8 q9, d2, d6 | |||
| 1: subs r3, r3, #2 | |||
| vld1.8 {q0}, [r1], r2 | |||
| pld [r1] | |||
| vadd.u16 q10, q8, q9 | |||
| vext.8 d4, d0, d1, #1 | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vaddl.u8 q8, d0, d4 | |||
| shrn d5, q10, #2 | |||
| vld1.8 {q1}, [r1], r2 | |||
| vadd.u16 q10, q8, q9 | |||
| pld [r1, r2] | |||
| .if \avg | |||
| vld1.8 {d7}, [r0,:64] | |||
| vrhadd.u8 d5, d5, d7 | |||
| .endif | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| shrn d7, q10, #2 | |||
| .if \avg | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 d7, d7, d5 | |||
| .endif | |||
| vext.8 d6, d2, d3, #1 | |||
| vaddl.u8 q9, d2, d6 | |||
| vst1.8 {d7}, [r0,:64], r2 | |||
| bgt 1b | |||
| vld1.8 {q0}, [r1], r2 | |||
| vadd.u16 q10, q8, q9 | |||
| vext.8 d4, d0, d1, #1 | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vaddl.u8 q8, d0, d4 | |||
| shrn d5, q10, #2 | |||
| vadd.u16 q10, q8, q9 | |||
| .if \avg | |||
| vld1.8 {d7}, [r0,:64] | |||
| vrhadd.u8 d5, d5, d7 | |||
| .endif | |||
| NRND vadd.u16 q10, q10, q11 | |||
| vst1.8 {d5}, [r0,:64], r2 | |||
| shrn d7, q10, #2 | |||
| .if \avg | |||
| vld1.8 {d5}, [r0,:64] | |||
| vrhadd.u8 d7, d7, d5 | |||
| .endif | |||
| vst1.8 {d7}, [r0,:64], r2 | |||
| bx lr | |||
| .endm | |||
| .macro pixfunc pfx, name, suf, rnd=1, avg=0 | |||
| .if \rnd | |||
| .macro avg rd, rn, rm | |||
| vrhadd.u8 \rd, \rn, \rm | |||
| .endm | |||
| .macro shrn rd, rn, rm | |||
| vrshrn.u16 \rd, \rn, \rm | |||
| .endm | |||
| .macro NRND insn:vararg | |||
| .endm | |||
| .else | |||
| .macro avg rd, rn, rm | |||
| vhadd.u8 \rd, \rn, \rm | |||
| .endm | |||
| .macro shrn rd, rn, rm | |||
| vshrn.u16 \rd, \rn, \rm | |||
| .endm | |||
| .macro NRND insn:vararg | |||
| \insn | |||
| .endm | |||
| .endif | |||
| function ff_\pfx\name\suf\()_neon, export=1 | |||
| \name \rnd, \avg | |||
| endfunc | |||
| .purgem avg | |||
| .purgem shrn | |||
| .purgem NRND | |||
| .endm | |||
| .macro pixfunc2 pfx, name, avg=0 | |||
| pixfunc \pfx, \name, rnd=1, avg=\avg | |||
| pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg | |||
| .endm | |||
| function ff_put_h264_qpel16_mc00_neon, export=1 | |||
| mov r3, #16 | |||
| endfunc | |||
| pixfunc put_, pixels16, avg=0 | |||
| pixfunc2 put_, pixels16_x2, avg=0 | |||
| pixfunc2 put_, pixels16_y2, avg=0 | |||
| pixfunc2 put_, pixels16_xy2, avg=0 | |||
| function ff_avg_h264_qpel16_mc00_neon, export=1 | |||
| mov r3, #16 | |||
| endfunc | |||
| pixfunc avg_, pixels16, avg=1 | |||
| pixfunc2 avg_, pixels16_x2, avg=1 | |||
| pixfunc2 avg_, pixels16_y2, avg=1 | |||
| pixfunc2 avg_, pixels16_xy2, avg=1 | |||
| function ff_put_h264_qpel8_mc00_neon, export=1 | |||
| mov r3, #8 | |||
| endfunc | |||
| pixfunc put_, pixels8, avg=0 | |||
| pixfunc2 put_, pixels8_x2, avg=0 | |||
| pixfunc2 put_, pixels8_y2, avg=0 | |||
| pixfunc2 put_, pixels8_xy2, avg=0 | |||
| function ff_avg_h264_qpel8_mc00_neon, export=1 | |||
| mov r3, #8 | |||
| endfunc | |||
| pixfunc avg_, pixels8, avg=1 | |||
| pixfunc avg_, pixels8_x2, avg=1 | |||
| pixfunc avg_, pixels8_y2, avg=1 | |||
| pixfunc avg_, pixels8_xy2, avg=1 | |||
| @@ -54,8 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext* c, int flags) | |||
| hpel_funcs(avg_no_rnd,, 16); | |||
| if (ARCH_X86) ff_hpeldsp_init_x86 (c, flags); | |||
| #if 0 | |||
| if (ARCH_ARM) ff_hpeldsp_init_arm (c, flags); | |||
| #if 0 | |||
| if (HAVE_VIS) ff_hpeldsp_init_vis (c, flags); | |||
| if (ARCH_ALPHA) ff_hpeldsp_init_alpha (c, flags); | |||
| #endif | |||