|
|
@@ -61,37 +61,37 @@ endconst |
|
|
|
br x10 |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro smull1 a b c |
|
|
|
.macro smull1 a, b, c |
|
|
|
smull \a, \b, \c |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro smlal1 a b c |
|
|
|
.macro smlal1 a, b, c |
|
|
|
smlal \a, \b, \c |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro smlsl1 a b c |
|
|
|
.macro smlsl1 a, b, c |
|
|
|
smlsl \a, \b, \c |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro idct_col4_top y1 y2 y3 y4 i l |
|
|
|
smull\i v7.4S, \y3\().\l, z2 |
|
|
|
smull\i v16.4S, \y3\().\l, z6 |
|
|
|
smull\i v17.4S, \y2\().\l, z1 |
|
|
|
.macro idct_col4_top y1, y2, y3, y4, i, l |
|
|
|
smull\i v7.4S, \y3\l, z1 |
|
|
|
smull\i v16.4S, \y3\l, z6 |
|
|
|
smull\i v17.4S, \y2\l, z1 |
|
|
|
add v19.4S, v23.4S, v7.4S |
|
|
|
smull\i v18.4S, \y2\().\l, z3 |
|
|
|
smull\i v18.4S, \y2\l, z3 |
|
|
|
add v20.4S, v23.4S, v16.4S |
|
|
|
smull\i v5.4S, \y2\().\l, z5 |
|
|
|
smull\i v5.4S, \y2\l, z5 |
|
|
|
sub v21.4S, v23.4S, v16.4S |
|
|
|
smull\i v6.4S, \y2\().\l, z7 |
|
|
|
smull\i v6.4S, \y2\l, z7 |
|
|
|
sub v22.4S, v23.4S, v7.4S |
|
|
|
|
|
|
|
smlal\i v17.4S, \y4\().\l, z3 |
|
|
|
smlsl\i v18.4S, \y4\().\l, z7 |
|
|
|
smlsl\i v5.4S, \y4\().\l, z1 |
|
|
|
smlsl\i v6.4S, \y4\().\l, z5 |
|
|
|
smlal\i v17.4S, \y4\l, z3 |
|
|
|
smlsl\i v18.4S, \y4\l, z7 |
|
|
|
smlsl\i v5.4S, \y4\l, z1 |
|
|
|
smlsl\i v6.4S, \y4\l, z5 |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro idct_row4_neon y1 y2 y3 y4 pass |
|
|
|
.macro idct_row4_neon y1, y2, y3, y4, pass |
|
|
|
ld1 {\y1\().2D-\y2\().2D}, [x2], #32 |
|
|
|
movi v23.4S, #1<<2, lsl #8 |
|
|
|
orr v5.16B, \y1\().16B, \y2\().16B |
|
|
@@ -101,7 +101,7 @@ endconst |
|
|
|
mov x3, v5.D[1] |
|
|
|
smlal v23.4S, \y1\().4H, z4 |
|
|
|
|
|
|
|
idct_col4_top \y1 \y2 \y3 \y4 1 4H |
|
|
|
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H |
|
|
|
|
|
|
|
cmp x3, #0 |
|
|
|
beq \pass\()f |
|
|
@@ -153,7 +153,7 @@ endconst |
|
|
|
trn2 \y4\().4S, v17.4S, v19.4S |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro declare_idct_col4_neon i l |
|
|
|
.macro declare_idct_col4_neon i, l |
|
|
|
function idct_col4_neon\i |
|
|
|
dup v23.4H, z4c |
|
|
|
.if \i == 1 |
|
|
@@ -164,14 +164,14 @@ function idct_col4_neon\i |
|
|
|
.endif |
|
|
|
smull v23.4S, v23.4H, z4 |
|
|
|
|
|
|
|
idct_col4_top v24 v25 v26 v27 \i \l |
|
|
|
idct_col4_top v24, v25, v26, v27, \i, \l |
|
|
|
|
|
|
|
mov x4, v28.D[\i - 1] |
|
|
|
mov x5, v29.D[\i - 1] |
|
|
|
cmp x4, #0 |
|
|
|
beq 1f |
|
|
|
|
|
|
|
smull\i v7.4S, v28.\l, z4 |
|
|
|
smull\i v7.4S, v28\l, z4 |
|
|
|
add v19.4S, v19.4S, v7.4S |
|
|
|
sub v20.4S, v20.4S, v7.4S |
|
|
|
sub v21.4S, v21.4S, v7.4S |
|
|
@@ -181,17 +181,17 @@ function idct_col4_neon\i |
|
|
|
cmp x5, #0 |
|
|
|
beq 2f |
|
|
|
|
|
|
|
smlal\i v17.4S, v29.\l, z5 |
|
|
|
smlsl\i v18.4S, v29.\l, z1 |
|
|
|
smlal\i v5.4S, v29.\l, z7 |
|
|
|
smlal\i v6.4S, v29.\l, z3 |
|
|
|
smlal\i v17.4S, v29\l, z5 |
|
|
|
smlsl\i v18.4S, v29\l, z1 |
|
|
|
smlal\i v5.4S, v29\l, z7 |
|
|
|
smlal\i v6.4S, v29\l, z3 |
|
|
|
|
|
|
|
2: mov x5, v31.D[\i - 1] |
|
|
|
cmp x4, #0 |
|
|
|
beq 3f |
|
|
|
|
|
|
|
smull\i v7.4S, v30.\l, z6 |
|
|
|
smull\i v16.4S, v30.\l, z2 |
|
|
|
smull\i v7.4S, v30\l, z6 |
|
|
|
smull\i v16.4S, v30\l, z2 |
|
|
|
add v19.4S, v19.4S, v7.4S |
|
|
|
sub v22.4S, v22.4S, v7.4S |
|
|
|
sub v20.4S, v20.4S, v16.4S |
|
|
@@ -200,10 +200,10 @@ function idct_col4_neon\i |
|
|
|
3: cmp x5, #0 |
|
|
|
beq 4f |
|
|
|
|
|
|
|
smlal\i v17.4S, v31.\l, z7 |
|
|
|
smlsl\i v18.4S, v31.\l, z5 |
|
|
|
smlal\i v5.4S, v31.\l, z3 |
|
|
|
smlsl\i v6.4S, v31.\l, z1 |
|
|
|
smlal\i v17.4S, v31\l, z7 |
|
|
|
smlsl\i v18.4S, v31\l, z5 |
|
|
|
smlal\i v5.4S, v31\l, z3 |
|
|
|
smlsl\i v6.4S, v31\l, z1 |
|
|
|
|
|
|
|
4: addhn v7.4H, v19.4S, v17.4S |
|
|
|
addhn2 v7.8H, v20.4S, v18.4S |
|
|
@@ -219,14 +219,14 @@ function idct_col4_neon\i |
|
|
|
endfunc |
|
|
|
.endm |
|
|
|
|
|
|
|
declare_idct_col4_neon 1 4H |
|
|
|
declare_idct_col4_neon 2 8H |
|
|
|
declare_idct_col4_neon 1, .4H |
|
|
|
declare_idct_col4_neon 2, .8H |
|
|
|
|
|
|
|
function ff_simple_idct_put_neon, export=1 |
|
|
|
idct_start x2 |
|
|
|
|
|
|
|
idct_row4_neon v24 v25 v26 v27 1 |
|
|
|
idct_row4_neon v28 v29 v30 v31 2 |
|
|
|
idct_row4_neon v24, v25, v26, v27, 1 |
|
|
|
idct_row4_neon v28, v29, v30, v31, 2 |
|
|
|
bl idct_col4_neon1 |
|
|
|
|
|
|
|
sqshrun v1.8B, v7.8H, #COL_SHIFT-16 |
|
|
@@ -263,8 +263,8 @@ endfunc |
|
|
|
function ff_simple_idct_add_neon, export=1 |
|
|
|
idct_start x2 |
|
|
|
|
|
|
|
idct_row4_neon v24 v25 v26 v27 1 |
|
|
|
idct_row4_neon v28 v29 v30 v31 2 |
|
|
|
idct_row4_neon v24, v25, v26, v27, 1 |
|
|
|
idct_row4_neon v28, v29, v30, v31, 2 |
|
|
|
bl idct_col4_neon1 |
|
|
|
|
|
|
|
sshr v1.8H, V7.8H, #COL_SHIFT-16 |
|
|
@@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1 |
|
|
|
idct_start x0 |
|
|
|
|
|
|
|
mov x2, x0 |
|
|
|
idct_row4_neon v24 v25 v26 v27 1 |
|
|
|
idct_row4_neon v28 v29 v30 v31 2 |
|
|
|
idct_row4_neon v24, v25, v26, v27, 1 |
|
|
|
idct_row4_neon v28, v29, v30, v31, 2 |
|
|
|
add x2, x2, #-128 |
|
|
|
bl idct_col4_neon1 |
|
|
|
|
|
|
|