(authors: Jason, Loren, Holger) to FFmpeg. Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as part of Google's GCI 2010. Originally committed as revision 26149 to svn://svn.ffmpeg.org/ffmpeg/trunktags/n0.8
@@ -2045,6 +2045,101 @@ INIT_MMX | |||||
PRED8x8L_VERTICAL_LEFT ssse3 | PRED8x8L_VERTICAL_LEFT ssse3 | ||||
%endif | %endif | ||||
;----------------------------------------------------------------------------- | |||||
; void pred8x8l_horizontal_up(uint8_t *src, int has_topleft, int has_topright, int stride) | |||||
;----------------------------------------------------------------------------- | |||||
%ifdef CONFIG_GPL | |||||
%macro PRED8x8L_HORIZONTAL_UP 1 | |||||
cglobal pred8x8l_horizontal_up_%1, 4,4 | |||||
sub r0, r3 | |||||
lea r2, [r0+r3*2] | |||||
movq mm0, [r0+r3*1-8] | |||||
punpckhbw mm0, [r0+r3*0-8] | |||||
movq mm1, [r2+r3*1-8] | |||||
punpckhbw mm1, [r0+r3*2-8] | |||||
mov r2, r0 | |||||
punpckhwd mm1, mm0 | |||||
lea r0, [r0+r3*4] | |||||
movq mm2, [r0+r3*1-8] | |||||
punpckhbw mm2, [r0+r3*0-8] | |||||
lea r0, [r0+r3*2] | |||||
movq mm3, [r0+r3*1-8] | |||||
punpckhbw mm3, [r0+r3*0-8] | |||||
punpckhwd mm3, mm2 | |||||
punpckhdq mm3, mm1 | |||||
lea r0, [r0+r3*2] | |||||
movq mm0, [r0+r3*0-8] | |||||
movq mm1, [r2] | |||||
mov r0, r2 | |||||
movq mm4, mm3 | |||||
movq mm2, mm3 | |||||
PALIGNR mm4, mm0, 7, mm0 | |||||
PALIGNR mm1, mm2, 1, mm2 | |||||
test r1, r1 | |||||
jnz .do_left | |||||
.fix_lt_1: | |||||
movq mm5, mm3 | |||||
pxor mm5, mm4 | |||||
psrlq mm5, 56 | |||||
psllq mm5, 48 | |||||
pxor mm1, mm5 | |||||
.do_left: | |||||
movq mm0, mm4 | |||||
PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 | |||||
movq mm4, mm0 | |||||
movq mm7, mm2 | |||||
PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 | |||||
psllq mm1, 56 | |||||
PALIGNR mm7, mm1, 7, mm3 | |||||
lea r1, [r0+r3*2] | |||||
pshufw mm0, mm7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1 | |||||
psllq mm7, 56 ; l7 .. .. .. .. .. .. .. | |||||
movq mm2, mm0 | |||||
psllw mm0, 8 | |||||
psrlw mm2, 8 | |||||
por mm2, mm0 ; l7 l6 l5 l4 l3 l2 l1 l0 | |||||
movq mm3, mm2 | |||||
movq mm4, mm2 | |||||
movq mm5, mm2 | |||||
psrlq mm2, 8 | |||||
psrlq mm3, 16 | |||||
lea r2, [r1+r3*2] | |||||
por mm2, mm7 ; l7 l7 l6 l5 l4 l3 l2 l1 | |||||
punpckhbw mm7, mm7 | |||||
por mm3, mm7 ; l7 l7 l7 l6 l5 l4 l3 l2 | |||||
pavgb mm4, mm2 | |||||
PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6 | |||||
movq mm5, mm4 | |||||
punpcklbw mm4, mm1 ; p4 p3 p2 p1 | |||||
punpckhbw mm5, mm1 ; p8 p7 p6 p5 | |||||
movq mm6, mm5 | |||||
movq mm7, mm5 | |||||
movq mm0, mm5 | |||||
PALIGNR mm5, mm4, 2, mm1 | |||||
pshufw mm1, mm6, 11111001b | |||||
PALIGNR mm6, mm4, 4, mm2 | |||||
pshufw mm2, mm7, 11111110b | |||||
PALIGNR mm7, mm4, 6, mm3 | |||||
pshufw mm3, mm0, 11111111b | |||||
movq [r0+r3*1], mm4 | |||||
movq [r0+r3*2], mm5 | |||||
lea r0, [r2+r3*2] | |||||
movq [r1+r3*1], mm6 | |||||
movq [r1+r3*2], mm7 | |||||
movq [r2+r3*1], mm0 | |||||
movq [r2+r3*2], mm1 | |||||
movq [r0+r3*1], mm2 | |||||
movq [r0+r3*2], mm3 | |||||
RET | |||||
%endmacro | |||||
INIT_MMX | |||||
%define PALIGNR PALIGNR_MMX | |||||
PRED8x8L_HORIZONTAL_UP mmxext | |||||
%define PALIGNR PALIGNR_SSSE3 | |||||
PRED8x8L_HORIZONTAL_UP ssse3 | |||||
%endif | |||||
;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) | ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) | ||||
;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
@@ -77,6 +77,8 @@ void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topr | |||||
void ff_pred8x8l_vertical_right_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_vertical_right_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | ||||
void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); | |||||
void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); | |||||
void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); | ||||
void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); | ||||
void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); | ||||
@@ -121,6 +123,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) | |||||
h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; | h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; | ||||
h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; | h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; | ||||
h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; | h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; | ||||
h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; | |||||
#endif | #endif | ||||
h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; | h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; | ||||
#if CONFIG_GPL | #if CONFIG_GPL | ||||
@@ -189,6 +192,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) | |||||
h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3; | h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3; | ||||
h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; | h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; | ||||
h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; | h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; | ||||
h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_ssse3; | |||||
#endif | #endif | ||||
if (codec_id == CODEC_ID_VP8) { | if (codec_id == CODEC_ID_VP8) { | ||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; | h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; | ||||