|
@@ -81,8 +81,8 @@ function ff_pred16x16_dc_neon, export=1 |
|
|
.L_pred16x16_dc_end: |
|
|
.L_pred16x16_dc_end: |
|
|
mov w3, #8 |
|
|
mov w3, #8 |
|
|
6: st1 {v0.16b}, [x0], x1 |
|
|
6: st1 {v0.16b}, [x0], x1 |
|
|
st1 {v0.16b}, [x0], x1 |
|
|
|
|
|
subs w3, w3, #1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.16b}, [x0], x1 |
|
|
b.ne 6b |
|
|
b.ne 6b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -91,8 +91,8 @@ function ff_pred16x16_hor_neon, export=1 |
|
|
sub x2, x0, #1 |
|
|
sub x2, x0, #1 |
|
|
mov w3, #16 |
|
|
mov w3, #16 |
|
|
1: ld1r {v0.16b}, [x2], x1 |
|
|
1: ld1r {v0.16b}, [x2], x1 |
|
|
st1 {v0.16b}, [x0], x1 |
|
|
|
|
|
subs w3, w3, #1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.16b}, [x0], x1 |
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -102,9 +102,9 @@ function ff_pred16x16_vert_neon, export=1 |
|
|
add x1, x1, x1 |
|
|
add x1, x1, x1 |
|
|
ld1 {v0.16b}, [x2], x1 |
|
|
ld1 {v0.16b}, [x2], x1 |
|
|
mov w3, #8 |
|
|
mov w3, #8 |
|
|
1: st1 {v0.16b}, [x0], x1 |
|
|
|
|
|
|
|
|
1: subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.16b}, [x0], x1 |
|
|
st1 {v0.16b}, [x2], x1 |
|
|
st1 {v0.16b}, [x2], x1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -158,8 +158,8 @@ function ff_pred16x16_plane_neon, export=1 |
|
|
add v1.8h, v1.8h, v2.8h |
|
|
add v1.8h, v1.8h, v2.8h |
|
|
sqshrun2 v0.16b, v1.8h, #5 |
|
|
sqshrun2 v0.16b, v1.8h, #5 |
|
|
add v1.8h, v1.8h, v3.8h |
|
|
add v1.8h, v1.8h, v3.8h |
|
|
st1 {v0.16b}, [x0], x1 |
|
|
|
|
|
subs w3, w3, #1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.16b}, [x0], x1 |
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -175,8 +175,8 @@ function ff_pred8x8_hor_neon, export=1 |
|
|
sub x2, x0, #1 |
|
|
sub x2, x0, #1 |
|
|
mov w3, #8 |
|
|
mov w3, #8 |
|
|
1: ld1r {v0.8b}, [x2], x1 |
|
|
1: ld1r {v0.8b}, [x2], x1 |
|
|
st1 {v0.8b}, [x0], x1 |
|
|
|
|
|
subs w3, w3, #1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.8b}, [x0], x1 |
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -186,9 +186,9 @@ function ff_pred8x8_vert_neon, export=1 |
|
|
lsl x1, x1, #1 |
|
|
lsl x1, x1, #1 |
|
|
ld1 {v0.8b}, [x2], x1 |
|
|
ld1 {v0.8b}, [x2], x1 |
|
|
mov w3, #4 |
|
|
mov w3, #4 |
|
|
1: st1 {v0.8b}, [x0], x1 |
|
|
|
|
|
|
|
|
1: subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.8b}, [x0], x1 |
|
|
st1 {v0.8b}, [x2], x1 |
|
|
st1 {v0.8b}, [x2], x1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -232,9 +232,9 @@ function ff_pred8x8_plane_neon, export=1 |
|
|
mov w3, #8 |
|
|
mov w3, #8 |
|
|
1: |
|
|
1: |
|
|
sqshrun v0.8b, v1.8h, #5 |
|
|
sqshrun v0.8b, v1.8h, #5 |
|
|
|
|
|
subs w3, w3, #1 |
|
|
add v1.8h, v1.8h, v2.8h |
|
|
add v1.8h, v1.8h, v2.8h |
|
|
st1 {v0.8b}, [x0], x1 |
|
|
st1 {v0.8b}, [x0], x1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
b.ne 1b |
|
|
b.ne 1b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
@@ -290,9 +290,9 @@ function ff_pred8x8_dc_neon, export=1 |
|
|
.L_pred8x8_dc_end: |
|
|
.L_pred8x8_dc_end: |
|
|
mov w3, #4 |
|
|
mov w3, #4 |
|
|
add x2, x0, x1, lsl #2 |
|
|
add x2, x0, x1, lsl #2 |
|
|
6: st1 {v0.8b}, [x0], x1 |
|
|
|
|
|
|
|
|
6: subs w3, w3, #1 |
|
|
|
|
|
st1 {v0.8b}, [x0], x1 |
|
|
st1 {v1.8b}, [x2], x1 |
|
|
st1 {v1.8b}, [x2], x1 |
|
|
subs w3, w3, #1 |
|
|
|
|
|
b.ne 6b |
|
|
b.ne 6b |
|
|
ret |
|
|
ret |
|
|
endfunc |
|
|
endfunc |
|
|