|
|
|
@@ -95,6 +95,7 @@ endfunc |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro pixels16_y2 rnd=1, avg=0 |
|
|
|
sub r3, r3, #2 |
|
|
|
vld1.64 {q0}, [r1], r2 |
|
|
|
vld1.64 {q1}, [r1], r2 |
|
|
|
1: subs r3, r3, #2 |
|
|
|
@@ -114,10 +115,25 @@ endfunc |
|
|
|
vst1.64 {q2}, [r0,:128], r2 |
|
|
|
vst1.64 {q3}, [r0,:128], r2 |
|
|
|
bne 1b |
|
|
|
|
|
|
|
avg q2, q0, q1 |
|
|
|
vld1.64 {q0}, [r1], r2 |
|
|
|
avg q3, q0, q1 |
|
|
|
.if \avg |
|
|
|
vld1.8 {q8}, [r0,:128], r2 |
|
|
|
vld1.8 {q9}, [r0,:128] |
|
|
|
vrhadd.u8 q2, q2, q8 |
|
|
|
vrhadd.u8 q3, q3, q9 |
|
|
|
sub r0, r0, r2 |
|
|
|
.endif |
|
|
|
vst1.64 {q2}, [r0,:128], r2 |
|
|
|
vst1.64 {q3}, [r0,:128], r2 |
|
|
|
|
|
|
|
bx lr |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro pixels16_xy2 rnd=1, avg=0 |
|
|
|
sub r3, r3, #2 |
|
|
|
vld1.64 {d0-d2}, [r1], r2 |
|
|
|
vld1.64 {d4-d6}, [r1], r2 |
|
|
|
.ifeq \rnd |
|
|
|
@@ -173,6 +189,42 @@ endfunc |
|
|
|
vaddl.u8 q11, d3, d5 |
|
|
|
vst1.64 {q15}, [r0,:128], r2 |
|
|
|
bgt 1b |
|
|
|
|
|
|
|
vld1.64 {d0-d2}, [r1], r2 |
|
|
|
vadd.u16 q12, q8, q9 |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q12, q12, q13 |
|
|
|
.endif |
|
|
|
vext.8 q15, q0, q1, #1 |
|
|
|
vadd.u16 q1 , q10, q11 |
|
|
|
shrn d28, q12, #2 |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q1, q1, q13 |
|
|
|
.endif |
|
|
|
shrn d29, q1, #2 |
|
|
|
.if \avg |
|
|
|
vld1.8 {q8}, [r0,:128] |
|
|
|
vrhadd.u8 q14, q14, q8 |
|
|
|
.endif |
|
|
|
vaddl.u8 q8, d0, d30 |
|
|
|
vaddl.u8 q10, d1, d31 |
|
|
|
vst1.64 {q14}, [r0,:128], r2 |
|
|
|
vadd.u16 q12, q8, q9 |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q12, q12, q13 |
|
|
|
.endif |
|
|
|
vadd.u16 q0, q10, q11 |
|
|
|
shrn d30, q12, #2 |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q0, q0, q13 |
|
|
|
.endif |
|
|
|
shrn d31, q0, #2 |
|
|
|
.if \avg |
|
|
|
vld1.8 {q9}, [r0,:128] |
|
|
|
vrhadd.u8 q15, q15, q9 |
|
|
|
.endif |
|
|
|
vst1.64 {q15}, [r0,:128], r2 |
|
|
|
|
|
|
|
bx lr |
|
|
|
.endm |
|
|
|
|
|
|
|
@@ -228,6 +280,7 @@ endfunc |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro pixels8_y2 rnd=1, avg=0 |
|
|
|
sub r3, r3, #2 |
|
|
|
vld1.64 {d0}, [r1], r2 |
|
|
|
vld1.64 {d1}, [r1], r2 |
|
|
|
1: subs r3, r3, #2 |
|
|
|
@@ -246,10 +299,24 @@ endfunc |
|
|
|
vst1.64 {d4}, [r0,:64], r2 |
|
|
|
vst1.64 {d5}, [r0,:64], r2 |
|
|
|
bne 1b |
|
|
|
|
|
|
|
avg d4, d0, d1 |
|
|
|
vld1.64 {d0}, [r1], r2 |
|
|
|
avg d5, d0, d1 |
|
|
|
.if \avg |
|
|
|
vld1.8 {d2}, [r0,:64], r2 |
|
|
|
vld1.8 {d3}, [r0,:64] |
|
|
|
vrhadd.u8 q2, q2, q1 |
|
|
|
sub r0, r0, r2 |
|
|
|
.endif |
|
|
|
vst1.64 {d4}, [r0,:64], r2 |
|
|
|
vst1.64 {d5}, [r0,:64], r2 |
|
|
|
|
|
|
|
bx lr |
|
|
|
.endm |
|
|
|
|
|
|
|
.macro pixels8_xy2 rnd=1, avg=0 |
|
|
|
sub r3, r3, #2 |
|
|
|
vld1.64 {q0}, [r1], r2 |
|
|
|
vld1.64 {q1}, [r1], r2 |
|
|
|
.ifeq \rnd |
|
|
|
@@ -291,6 +358,31 @@ endfunc |
|
|
|
vaddl.u8 q9, d2, d6 |
|
|
|
vst1.64 {d7}, [r0,:64], r2 |
|
|
|
bgt 1b |
|
|
|
|
|
|
|
vld1.64 {q0}, [r1], r2 |
|
|
|
vadd.u16 q10, q8, q9 |
|
|
|
vext.8 d4, d0, d1, #1 |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q10, q10, q11 |
|
|
|
.endif |
|
|
|
vaddl.u8 q8, d0, d4 |
|
|
|
shrn d5, q10, #2 |
|
|
|
vadd.u16 q10, q8, q9 |
|
|
|
.if \avg |
|
|
|
vld1.8 {d7}, [r0,:64] |
|
|
|
vrhadd.u8 d5, d5, d7 |
|
|
|
.endif |
|
|
|
.ifeq \rnd |
|
|
|
vadd.u16 q10, q10, q11 |
|
|
|
.endif |
|
|
|
vst1.64 {d5}, [r0,:64], r2 |
|
|
|
shrn d7, q10, #2 |
|
|
|
.if \avg |
|
|
|
vld1.8 {d5}, [r0,:64] |
|
|
|
vrhadd.u8 d7, d7, d5 |
|
|
|
.endif |
|
|
|
vst1.64 {d7}, [r0,:64], r2 |
|
|
|
|
|
|
|
bx lr |
|
|
|
.endm |
|
|
|
|
|
|
|
|