|
|
@@ -1229,18 +1229,22 @@ cglobal vp8_luma_dc_wht_mmx, 2,3 |
|
|
movd [%7+%9*2], m%4 |
|
|
movd [%7+%9*2], m%4 |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
%macro SPLATB_REG 3 |
|
|
|
|
|
|
|
|
%macro SPLATB_REG 3-4 |
|
|
movd %1, %2 |
|
|
movd %1, %2 |
|
|
|
|
|
%ifidn %3, ssse3 |
|
|
|
|
|
pshufb %1, %4 |
|
|
|
|
|
%else |
|
|
punpcklbw %1, %1 |
|
|
punpcklbw %1, %1 |
|
|
%if mmsize == 16 ; sse2 |
|
|
%if mmsize == 16 ; sse2 |
|
|
punpcklwd %1, %1 |
|
|
|
|
|
pshufd %1, %1, 0x0 |
|
|
|
|
|
|
|
|
pshuflw %1, %1, 0x0 |
|
|
|
|
|
punpcklqdq %1, %1 |
|
|
%elifidn %3, mmx |
|
|
%elifidn %3, mmx |
|
|
punpcklwd %1, %1 |
|
|
punpcklwd %1, %1 |
|
|
punpckldq %1, %1 |
|
|
punpckldq %1, %1 |
|
|
%else ; mmxext |
|
|
%else ; mmxext |
|
|
pshufw %1, %1, 0x0 |
|
|
pshufw %1, %1, 0x0 |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
%endif |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
%macro SIMPLE_LOOPFILTER 3 |
|
|
%macro SIMPLE_LOOPFILTER 3 |
|
|
@@ -1252,7 +1256,10 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 |
|
|
%if mmsize == 8 ; mmx/mmxext |
|
|
%if mmsize == 8 ; mmx/mmxext |
|
|
mov r3, 2 |
|
|
mov r3, 2 |
|
|
%endif |
|
|
%endif |
|
|
SPLATB_REG m7, r2, %1 ; splat "flim" into register |
|
|
|
|
|
|
|
|
%ifidn %1, ssse3 |
|
|
|
|
|
pxor m0, m0 |
|
|
|
|
|
%endif |
|
|
|
|
|
SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register |
|
|
|
|
|
|
|
|
; set up indexes to address 4 rows |
|
|
; set up indexes to address 4 rows |
|
|
mov r2, r1 |
|
|
mov r2, r1 |
|
|
@@ -1398,6 +1405,8 @@ SIMPLE_LOOPFILTER mmxext, h, 6 |
|
|
INIT_XMM |
|
|
INIT_XMM |
|
|
SIMPLE_LOOPFILTER sse2, v, 3 |
|
|
SIMPLE_LOOPFILTER sse2, v, 3 |
|
|
SIMPLE_LOOPFILTER sse2, h, 6 |
|
|
SIMPLE_LOOPFILTER sse2, h, 6 |
|
|
|
|
|
SIMPLE_LOOPFILTER ssse3, v, 3 |
|
|
|
|
|
SIMPLE_LOOPFILTER ssse3, h, 6 |
|
|
|
|
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
;----------------------------------------------------------------------------- |
|
|
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride, |
|
|
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride, |
|
|
@@ -1433,11 +1442,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 |
|
|
%define stack_reg hev_thr_reg |
|
|
%define stack_reg hev_thr_reg |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
%ifidn %1, ssse3 |
|
|
|
|
|
pxor m7, m7 |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
%ifndef m8 ; mmx/mmxext or sse2 on x86-32 |
|
|
%ifndef m8 ; mmx/mmxext or sse2 on x86-32 |
|
|
; splat function arguments |
|
|
; splat function arguments |
|
|
SPLATB_REG m0, E_reg, %1 ; E |
|
|
|
|
|
SPLATB_REG m1, I_reg, %1 ; I |
|
|
|
|
|
SPLATB_REG m2, hev_thr_reg, %1 ; hev_thresh |
|
|
|
|
|
|
|
|
SPLATB_REG m0, E_reg, %1, m7 ; E |
|
|
|
|
|
SPLATB_REG m1, I_reg, %1, m7 ; I |
|
|
|
|
|
SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh |
|
|
|
|
|
|
|
|
; align stack |
|
|
; align stack |
|
|
mov stack_reg, rsp ; backup stack pointer |
|
|
mov stack_reg, rsp ; backup stack pointer |
|
|
@@ -1470,9 +1483,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 |
|
|
%define q0backup m8 |
|
|
%define q0backup m8 |
|
|
|
|
|
|
|
|
; splat function arguments |
|
|
; splat function arguments |
|
|
SPLATB_REG flim_E, E_reg, %1 ; E |
|
|
|
|
|
SPLATB_REG flim_I, I_reg, %1 ; I |
|
|
|
|
|
SPLATB_REG hev_thr, hev_thr_reg, %1 ; hev_thresh |
|
|
|
|
|
|
|
|
SPLATB_REG flim_E, E_reg, %1, m7 ; E |
|
|
|
|
|
SPLATB_REG flim_I, I_reg, %1, m7 ; I |
|
|
|
|
|
SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
%if mmsize == 8 && %4 == 16 ; mmx/mmxext |
|
|
%if mmsize == 8 && %4 == 16 ; mmx/mmxext |
|
|
@@ -1884,15 +1897,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
INIT_MMX |
|
|
INIT_MMX |
|
|
INNER_LOOPFILTER mmx, v, 6, 16, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmx, h, 6, 16, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, v, 6, 16, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, h, 6, 16, 8 |
|
|
|
|
|
|
|
|
INNER_LOOPFILTER mmx, v, 6, 16, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmx, h, 6, 16, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, v, 6, 16, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, h, 6, 16, 0 |
|
|
|
|
|
|
|
|
INNER_LOOPFILTER mmx, v, 6, 8, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmx, h, 6, 8, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, v, 6, 8, 8 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, h, 6, 8, 8 |
|
|
|
|
|
|
|
|
INNER_LOOPFILTER mmx, v, 6, 8, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmx, h, 6, 8, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, v, 6, 8, 0 |
|
|
|
|
|
INNER_LOOPFILTER mmxext, h, 6, 8, 0 |
|
|
|
|
|
|
|
|
INIT_XMM |
|
|
INIT_XMM |
|
|
INNER_LOOPFILTER sse2, v, 5, 16, 13 |
|
|
INNER_LOOPFILTER sse2, v, 5, 16, 13 |
|
|
@@ -1904,6 +1917,15 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13 |
|
|
INNER_LOOPFILTER sse2, v, 6, 8, 13 |
|
|
INNER_LOOPFILTER sse2, v, 6, 8, 13 |
|
|
INNER_LOOPFILTER sse2, h, 6, 8, 13 |
|
|
INNER_LOOPFILTER sse2, h, 6, 8, 13 |
|
|
|
|
|
|
|
|
|
|
|
INNER_LOOPFILTER ssse3, v, 5, 16, 13 |
|
|
|
|
|
%ifdef m8 |
|
|
|
|
|
INNER_LOOPFILTER ssse3, h, 5, 16, 13 |
|
|
|
|
|
%else |
|
|
|
|
|
INNER_LOOPFILTER ssse3, h, 6, 16, 13 |
|
|
|
|
|
%endif |
|
|
|
|
|
INNER_LOOPFILTER ssse3, v, 6, 8, 13 |
|
|
|
|
|
INNER_LOOPFILTER ssse3, h, 6, 8, 13 |
|
|
|
|
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
;----------------------------------------------------------------------------- |
|
|
; void vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride, |
|
|
; void vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride, |
|
|
; int flimE, int flimI, int hev_thr); |
|
|
; int flimE, int flimI, int hev_thr); |
|
|
@@ -1984,11 +2006,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 |
|
|
%define stack_reg hev_thr_reg |
|
|
%define stack_reg hev_thr_reg |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
%ifidn %1, ssse3 |
|
|
|
|
|
pxor m7, m7 |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
%ifndef m8 ; mmx/mmxext or sse2 on x86-32 |
|
|
%ifndef m8 ; mmx/mmxext or sse2 on x86-32 |
|
|
; splat function arguments |
|
|
; splat function arguments |
|
|
SPLATB_REG m0, E_reg, %1 ; E |
|
|
|
|
|
SPLATB_REG m1, I_reg, %1 ; I |
|
|
|
|
|
SPLATB_REG m2, hev_thr_reg, %1 ; hev_thresh |
|
|
|
|
|
|
|
|
SPLATB_REG m0, E_reg, %1, m7 ; E |
|
|
|
|
|
SPLATB_REG m1, I_reg, %1, m7 ; I |
|
|
|
|
|
SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh |
|
|
|
|
|
|
|
|
; align stack |
|
|
; align stack |
|
|
mov stack_reg, rsp ; backup stack pointer |
|
|
mov stack_reg, rsp ; backup stack pointer |
|
|
@@ -2028,9 +2054,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 |
|
|
%define lim_sign m15 |
|
|
%define lim_sign m15 |
|
|
|
|
|
|
|
|
; splat function arguments |
|
|
; splat function arguments |
|
|
SPLATB_REG flim_E, E_reg, %1 ; E |
|
|
|
|
|
SPLATB_REG flim_I, I_reg, %1 ; I |
|
|
|
|
|
SPLATB_REG hev_thr, hev_thr_reg, %1 ; hev_thresh |
|
|
|
|
|
|
|
|
SPLATB_REG flim_E, E_reg, %1, m7 ; E |
|
|
|
|
|
SPLATB_REG flim_I, I_reg, %1, m7 ; I |
|
|
|
|
|
SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
%if mmsize == 8 && %4 == 16 ; mmx/mmxext |
|
|
%if mmsize == 8 && %4 == 16 ; mmx/mmxext |
|
|
@@ -2521,15 +2547,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
INIT_MMX |
|
|
INIT_MMX |
|
|
MBEDGE_LOOPFILTER mmx, v, 6, 16, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, h, 6, 16, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, v, 6, 16, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, h, 6, 16, 8 |
|
|
|
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, v, 6, 16, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, h, 6, 16, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 |
|
|
|
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, v, 6, 8, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, h, 6, 8, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 8 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 8 |
|
|
|
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, v, 6, 8, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmx, h, 6, 8, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0 |
|
|
|
|
|
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 |
|
|
|
|
|
|
|
|
INIT_XMM |
|
|
INIT_XMM |
|
|
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 |
|
|
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 |
|
|
@@ -2540,3 +2566,12 @@ MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 |
|
|
%endif |
|
|
%endif |
|
|
MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 |
|
|
MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 |
|
|
MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 |
|
|
MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 |
|
|
|
|
|
|
|
|
|
|
|
MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 |
|
|
|
|
|
%ifdef m8 |
|
|
|
|
|
MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 |
|
|
|
|
|
%else |
|
|
|
|
|
MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 |
|
|
|
|
|
%endif |
|
|
|
|
|
MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 |
|
|
|
|
|
MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 |