Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); | |||
| void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | |||
| void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, | |||
| int beta, int8_t *tc0); | |||
| @@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||
| if (CONFIG_H264_DECODER) { | |||
| c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; | |||
| c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; | |||
| c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; | |||
| c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; | |||
| c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; | |||
| c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon; | |||
| c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; | |||
| c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon; | |||
| @@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 | |||
| .endfunc | |||
| .endm | |||
| .macro h264_chroma_mc2 type | |||
| function ff_\type\()_h264_chroma_mc2_neon, export=1 | |||
| push {r4-r6, lr} | |||
| ldr r4, [sp, #16] | |||
| ldr lr, [sp, #20] | |||
| pld [r1] | |||
| pld [r1, r2] | |||
| orrs r5, r4, lr | |||
| beq 2f | |||
| mul r5, r4, lr | |||
| rsb r6, r5, lr, lsl #3 | |||
| rsb r12, r5, r4, lsl #3 | |||
| sub r4, r5, r4, lsl #3 | |||
| sub r4, r4, lr, lsl #3 | |||
| add r4, r4, #64 | |||
| vdup.8 d0, r4 | |||
| vdup.8 d2, r12 | |||
| vdup.8 d1, r6 | |||
| vdup.8 d3, r5 | |||
| vtrn.16 q0, q1 | |||
| 1: | |||
| vld1.32 {d4[0]}, [r1], r2 | |||
| vld1.32 {d4[1]}, [r1], r2 | |||
| vrev64.32 d5, d4 | |||
| vld1.32 {d5[1]}, [r1] | |||
| vext.8 q3, q2, q2, #1 | |||
| vtrn.16 q2, q3 | |||
| vmull.u8 q8, d4, d0 | |||
| vmlal.u8 q8, d5, d1 | |||
| .ifc \type,avg | |||
| vld1.16 {d18[0]}, [r0,:16], r2 | |||
| vld1.16 {d18[1]}, [r0,:16] | |||
| sub r0, r0, r2 | |||
| .endif | |||
| vtrn.32 d16, d17 | |||
| vadd.i16 d16, d16, d17 | |||
| vrshrn.u16 d16, q8, #6 | |||
| .ifc \type,avg | |||
| vrhadd.u8 d16, d16, d18 | |||
| .endif | |||
| vst1.16 {d16[0]}, [r0,:16], r2 | |||
| vst1.16 {d16[1]}, [r0,:16], r2 | |||
| subs r3, r3, #2 | |||
| bgt 1b | |||
| pop {r4-r6, pc} | |||
| 2: | |||
| .ifc \type,put | |||
| ldrh r5, [r1], r2 | |||
| strh r5, [r0], r2 | |||
| ldrh r6, [r1], r2 | |||
| strh r6, [r0], r2 | |||
| .else | |||
| vld1.16 {d16[0]}, [r1], r2 | |||
| vld1.16 {d16[1]}, [r1], r2 | |||
| vld1.16 {d18[0]}, [r0,:16], r2 | |||
| vld1.16 {d18[1]}, [r0,:16] | |||
| sub r0, r0, r2 | |||
| vrhadd.u8 d16, d16, d18 | |||
| vst1.16 {d16[0]}, [r0,:16], r2 | |||
| vst1.16 {d16[1]}, [r0,:16], r2 | |||
| .endif | |||
| subs r3, r3, #2 | |||
| bgt 2b | |||
| pop {r4-r6, pc} | |||
| .endfunc | |||
| .endm | |||
| .text | |||
| .align | |||
| @@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 | |||
| h264_chroma_mc8 avg | |||
| h264_chroma_mc4 put | |||
| h264_chroma_mc4 avg | |||
| h264_chroma_mc2 put | |||
| h264_chroma_mc2 avg | |||
| /* H.264 loop filter */ | |||