(authors: Jason, Loren, Holger) to FFmpeg. Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as part of Google's GCI 2010. Originally committed as revision 26150 to svn://svn.ffmpeg.org/ffmpeg/trunktags/n0.8
| @@ -2140,6 +2140,127 @@ PRED8x8L_HORIZONTAL_UP mmxext | |||||
| PRED8x8L_HORIZONTAL_UP ssse3 | PRED8x8L_HORIZONTAL_UP ssse3 | ||||
| %endif | %endif | ||||
| ;----------------------------------------------------------------------------- | |||||
| ;void pred8x8l_horizontal_down(uint8_t *src, int has_topleft, int has_topright, int stride) | |||||
| ;----------------------------------------------------------------------------- | |||||
| %ifdef CONFIG_GPL | |||||
| INIT_MMX | |||||
| %define PALIGNR PALIGNR_MMX | |||||
| cglobal pred8x8l_horizontal_down_mmxext, 4,5 | |||||
| sub r0, r3 | |||||
| lea r4, [r0+r3*2] | |||||
| movq mm0, [r0+r3*1-8] | |||||
| punpckhbw mm0, [r0+r3*0-8] | |||||
| movq mm1, [r4+r3*1-8] | |||||
| punpckhbw mm1, [r0+r3*2-8] | |||||
| mov r4, r0 | |||||
| punpckhwd mm1, mm0 | |||||
| lea r0, [r0+r3*4] | |||||
| movq mm2, [r0+r3*1-8] | |||||
| punpckhbw mm2, [r0+r3*0-8] | |||||
| lea r0, [r0+r3*2] | |||||
| movq mm3, [r0+r3*1-8] | |||||
| punpckhbw mm3, [r0+r3*0-8] | |||||
| punpckhwd mm3, mm2 | |||||
| punpckhdq mm3, mm1 | |||||
| lea r0, [r0+r3*2] | |||||
| movq mm0, [r0+r3*0-8] | |||||
| movq mm1, [r4] | |||||
| mov r0, r4 | |||||
| movq mm4, mm3 | |||||
| movq mm2, mm3 | |||||
| PALIGNR mm4, mm0, 7, mm0 | |||||
| PALIGNR mm1, mm2, 1, mm2 | |||||
| test r1, r1 | |||||
| jnz .do_left | |||||
| .fix_lt_1: | |||||
| movq mm5, mm3 | |||||
| pxor mm5, mm4 | |||||
| psrlq mm5, 56 | |||||
| psllq mm5, 48 | |||||
| pxor mm1, mm5 | |||||
| jmp .do_left | |||||
| .fix_lt_2: | |||||
| movq mm5, mm3 | |||||
| pxor mm5, mm2 | |||||
| psllq mm5, 56 | |||||
| psrlq mm5, 56 | |||||
| pxor mm2, mm5 | |||||
| test r2, r2 | |||||
| jnz .do_top | |||||
| .fix_tr_1: | |||||
| movq mm5, mm3 | |||||
| pxor mm5, mm1 | |||||
| psrlq mm5, 56 | |||||
| psllq mm5, 56 | |||||
| pxor mm1, mm5 | |||||
| jmp .do_top | |||||
| .do_left: | |||||
| movq mm0, mm4 | |||||
| PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 | |||||
| movq mm4, mm0 | |||||
| movq mm7, mm2 | |||||
| movq mm6, mm2 | |||||
| PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 | |||||
| psllq mm1, 56 | |||||
| PALIGNR mm7, mm1, 7, mm3 | |||||
| movq mm0, [r0-8] | |||||
| movq mm3, [r0] | |||||
| movq mm1, [r0+8] | |||||
| movq mm2, mm3 | |||||
| movq mm4, mm3 | |||||
| PALIGNR mm2, mm0, 7, mm0 | |||||
| PALIGNR mm1, mm4, 1, mm4 | |||||
| test r1, r1 | |||||
| jz .fix_lt_2 | |||||
| test r2, r2 | |||||
| jz .fix_tr_1 | |||||
| .do_top: | |||||
| PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 | |||||
| movq mm5, mm4 | |||||
| lea r1, [r0+r3*2] | |||||
| psllq mm7, 56 | |||||
| movq mm2, mm5 | |||||
| movq mm3, mm6 | |||||
| movq mm4, mm2 | |||||
| PALIGNR mm2, mm6, 7, mm5 | |||||
| PALIGNR mm6, mm7, 7, mm0 | |||||
| lea r2, [r1+r3*2] | |||||
| PALIGNR mm4, mm3, 1, mm7 | |||||
| movq mm5, mm3 | |||||
| pavgb mm3, mm6 | |||||
| PRED4x4_LOWPASS mm0, mm4, mm6, mm5, mm7 | |||||
| movq mm4, mm2 | |||||
| movq mm1, mm2 | |||||
| lea r4, [r2+r3*2] | |||||
| psrlq mm4, 16 | |||||
| psrlq mm1, 8 | |||||
| PRED4x4_LOWPASS mm6, mm4, mm2, mm1, mm5 | |||||
| movq mm7, mm3 | |||||
| punpcklbw mm3, mm0 | |||||
| punpckhbw mm7, mm0 | |||||
| movq mm1, mm7 | |||||
| movq mm0, mm7 | |||||
| movq mm4, mm7 | |||||
| movq [r4+r3*2], mm3 | |||||
| PALIGNR mm7, mm3, 2, mm5 | |||||
| movq [r4+r3*1], mm7 | |||||
| PALIGNR mm1, mm3, 4, mm5 | |||||
| movq [r2+r3*2], mm1 | |||||
| PALIGNR mm0, mm3, 6, mm3 | |||||
| movq [r2+r3*1], mm0 | |||||
| movq mm2, mm6 | |||||
| movq mm3, mm6 | |||||
| movq [r1+r3*2], mm4 | |||||
| PALIGNR mm6, mm4, 2, mm5 | |||||
| movq [r1+r3*1], mm6 | |||||
| PALIGNR mm2, mm4, 4, mm5 | |||||
| movq [r0+r3*2], mm2 | |||||
| PALIGNR mm3, mm4, 6, mm4 | |||||
| movq [r0+r3*1], mm3 | |||||
| RET | |||||
| %endif | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) | ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| @@ -79,6 +79,7 @@ void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topri | |||||
| void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
| void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
| void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
| void ff_pred8x8l_horizontal_down_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); | |||||
| void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); | ||||
| void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); | ||||
| void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); | ||||
| @@ -124,6 +125,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) | |||||
| h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; | h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; | ||||
| h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; | h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; | ||||
| h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; | h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; | ||||
| h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext; | |||||
| #endif | #endif | ||||
| h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; | h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; | ||||
| #if CONFIG_GPL | #if CONFIG_GPL | ||||