Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser <darkshikari gmail com> (approves LGPL relicensing for this code) and Loren Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing for this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as part of Google's GCI 2010. Originally committed as revision 26138 to svn://svn.ffmpeg.org/ffmpeg/trunktags/n0.8
| @@ -43,6 +43,7 @@ cextern pb_1 | |||
| cextern pb_3 | |||
| cextern pw_4 | |||
| cextern pw_5 | |||
| cextern pw_8 | |||
| cextern pw_16 | |||
| cextern pw_17 | |||
| cextern pw_32 | |||
| @@ -1138,6 +1139,110 @@ PRED8x8L_TOP_DC mmxext | |||
| PRED8x8L_TOP_DC ssse3 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride) | |||
| ;----------------------------------------------------------------------------- | |||
| %ifdef CONFIG_GPL | |||
| %macro PRED8x8L_DC 1 | |||
| cglobal pred8x8l_dc_%1, 4,5 | |||
| sub r0, r3 | |||
| lea r4, [r0+r3*2] | |||
| movq mm0, [r0+r3*1-8] | |||
| punpckhbw mm0, [r0+r3*0-8] | |||
| movq mm1, [r4+r3*1-8] | |||
| punpckhbw mm1, [r0+r3*2-8] | |||
| mov r4, r0 | |||
| punpckhwd mm1, mm0 | |||
| lea r0, [r0+r3*4] | |||
| movq mm2, [r0+r3*1-8] | |||
| punpckhbw mm2, [r0+r3*0-8] | |||
| lea r0, [r0+r3*2] | |||
| movq mm3, [r0+r3*1-8] | |||
| punpckhbw mm3, [r0+r3*0-8] | |||
| punpckhwd mm3, mm2 | |||
| punpckhdq mm3, mm1 | |||
| lea r0, [r0+r3*2] | |||
| movq mm0, [r0+r3*0-8] | |||
| movq mm1, [r4] | |||
| mov r0, r4 | |||
| movq mm4, mm3 | |||
| movq mm2, mm3 | |||
| PALIGNR mm4, mm0, 7, mm0 | |||
| PALIGNR mm1, mm2, 1, mm2 | |||
| test r1, r1 | |||
| jnz .do_left | |||
| .fix_lt_1: | |||
| movq mm5, mm3 | |||
| pxor mm5, mm4 | |||
| psrlq mm5, 56 | |||
| psllq mm5, 48 | |||
| pxor mm1, mm5 | |||
| jmp .do_left | |||
| .fix_lt_2: | |||
| movq mm5, mm3 | |||
| pxor mm5, mm2 | |||
| psllq mm5, 56 | |||
| psrlq mm5, 56 | |||
| pxor mm2, mm5 | |||
| test r2, r2 | |||
| jnz .body | |||
| .fix_tr_1: | |||
| movq mm5, mm3 | |||
| pxor mm5, mm1 | |||
| psrlq mm5, 56 | |||
| psllq mm5, 56 | |||
| pxor mm1, mm5 | |||
| jmp .body | |||
| .do_left: | |||
| movq mm0, mm4 | |||
| PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 | |||
| movq mm4, mm0 | |||
| movq mm7, mm2 | |||
| PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 | |||
| psllq mm1, 56 | |||
| PALIGNR mm7, mm1, 7, mm3 | |||
| movq mm0, [r0-8] | |||
| movq mm3, [r0] | |||
| movq mm1, [r0+8] | |||
| movq mm2, mm3 | |||
| movq mm4, mm3 | |||
| PALIGNR mm2, mm0, 7, mm0 | |||
| PALIGNR mm1, mm4, 1, mm4 | |||
| test r1, r1 | |||
| jz .fix_lt_2 | |||
| test r2, r2 | |||
| jz .fix_tr_1 | |||
| .body | |||
| lea r1, [r0+r3*2] | |||
| PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 | |||
| pxor mm0, mm0 | |||
| pxor mm1, mm1 | |||
| lea r2, [r1+r3*2] | |||
| psadbw mm0, mm7 | |||
| psadbw mm1, mm6 | |||
| paddw mm0, [pw_8] | |||
| paddw mm0, mm1 | |||
| lea r4, [r2+r3*2] | |||
| psrlw mm0, 4 | |||
| pshufw mm0, mm0, 0 | |||
| packuswb mm0, mm0 | |||
| movq [r0+r3*1], mm0 | |||
| movq [r0+r3*2], mm0 | |||
| movq [r1+r3*1], mm0 | |||
| movq [r1+r3*2], mm0 | |||
| movq [r2+r3*1], mm0 | |||
| movq [r2+r3*2], mm0 | |||
| movq [r4+r3*1], mm0 | |||
| movq [r4+r3*2], mm0 | |||
| RET | |||
| %endmacro | |||
| INIT_MMX | |||
| %define PALIGNR PALIGNR_MMX | |||
| PRED8x8L_DC mmxext | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_DC ssse3 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -61,6 +61,8 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); | |||
| void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); | |||
| void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); | |||
| void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); | |||
| void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); | |||
| void ff_pred8x8l_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); | |||
| void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); | |||
| void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); | |||
| void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); | |||
| @@ -100,6 +102,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) | |||
| h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; | |||
| #if CONFIG_GPL | |||
| h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; | |||
| h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; | |||
| #endif | |||
| h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; | |||
| #if CONFIG_GPL | |||
| @@ -155,6 +158,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) | |||
| h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; | |||
| #if CONFIG_GPL | |||
| h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; | |||
| h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; | |||
| #endif | |||
| if (codec_id == CODEC_ID_VP8) { | |||
| h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; | |||