The standard syntax requires two destination registers for LDRD/STRD instructions. Some versions of the GNU assembler allow using only one with the second implicit, others are more strict. Signed-off-by: Mans Rullgard <mans@mansr.com>tags/n1.0
| @@ -24,7 +24,7 @@ | |||
| .macro h264_chroma_mc8 type, codec=h264 | |||
| function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | |||
| push {r4-r7, lr} | |||
| ldrd r4, [sp, #20] | |||
| ldrd r4, r5, [sp, #20] | |||
| .ifc \type,avg | |||
| mov lr, r0 | |||
| .endif | |||
| @@ -182,7 +182,7 @@ endfunc | |||
| .macro h264_chroma_mc4 type, codec=h264 | |||
| function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 | |||
| push {r4-r7, lr} | |||
| ldrd r4, [sp, #20] | |||
| ldrd r4, r5, [sp, #20] | |||
| .ifc \type,avg | |||
| mov lr, r0 | |||
| .endif | |||
| @@ -886,7 +886,7 @@ T mov sp, r0 | |||
| mov r12, #8 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel8_h_lowpass_neon | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| mov r3, r2 | |||
| add r12, sp, #64 | |||
| sub r1, r1, r2, lsl #1 | |||
| @@ -913,7 +913,7 @@ T mov sp, r0 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel8_h_lowpass_neon | |||
| mov r4, r0 | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| sub r1, r1, r2, lsl #1 | |||
| sub r1, r1, #2 | |||
| mov r3, r2 | |||
| @@ -958,7 +958,7 @@ T mov sp, r0 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel8_v_lowpass_neon | |||
| mov r4, r0 | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| sub r1, r1, r3, lsl #1 | |||
| sub r1, r1, #2 | |||
| sub r2, r4, #64 | |||
| @@ -1071,7 +1071,7 @@ T mov sp, r0 | |||
| mov r3, #16 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel16_h_lowpass_neon | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| mov r3, r2 | |||
| add r12, sp, #64 | |||
| sub r1, r1, r2, lsl #1 | |||
| @@ -1096,7 +1096,7 @@ T mov sp, r0 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel16_h_lowpass_neon_packed | |||
| mov r4, r0 | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| sub r1, r1, r2, lsl #1 | |||
| sub r1, r1, #2 | |||
| mov r3, r2 | |||
| @@ -1139,7 +1139,7 @@ T mov sp, r0 | |||
| vpush {d8-d15} | |||
| bl put_h264_qpel16_v_lowpass_neon_packed | |||
| mov r4, r0 | |||
| ldrd r0, [r11], #8 | |||
| ldrd r0, r1, [r11], #8 | |||
| sub r1, r1, r3, lsl #1 | |||
| sub r1, r1, #2 | |||
| mov r2, r3 | |||
| @@ -61,9 +61,9 @@ function ff_dct_unquantize_h263_armv5te, export=1 | |||
| mov ip, #0 | |||
| subs r3, r3, #2 | |||
| ble 2f | |||
| ldrd r4, [r0, #0] | |||
| ldrd r4, r5, [r0, #0] | |||
| 1: | |||
| ldrd r6, [r0, #8] | |||
| ldrd r6, r7, [r0, #8] | |||
| dequant_t r9, r4, r1, r2, r9 | |||
| dequant_t lr, r5, r1, r2, lr | |||
| @@ -87,7 +87,7 @@ function ff_dct_unquantize_h263_armv5te, export=1 | |||
| subs r3, r3, #8 | |||
| it gt | |||
| ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ | |||
| ldrdgt r4, r5, [r0, #0] /* load data early to avoid load/use pipeline stall */ | |||
| bgt 1b | |||
| adds r3, r3, #2 | |||
| @@ -46,8 +46,8 @@ w57: .long W57 | |||
| function idct_row_armv5te | |||
| str lr, [sp, #-4]! | |||
| ldrd v1, [a1, #8] | |||
| ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |||
| ldrd v1, v2, [a1, #8] | |||
| ldrd a3, a4, [a1] /* a3 = row[1:0], a4 = row[3:2] */ | |||
| orrs v1, v1, v2 | |||
| itt eq | |||
| cmpeq v1, a4 | |||
| @@ -78,7 +78,7 @@ function idct_row_armv5te | |||
| smultt fp, lr, a3 | |||
| sub v7, v7, a2 | |||
| smulbt a2, lr, a4 | |||
| ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |||
| ldrd a3, a4, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ | |||
| sub fp, fp, a2 | |||
| orrs a2, a3, a4 | |||
| @@ -121,7 +121,7 @@ function idct_row_armv5te | |||
| add a2, v4, fp | |||
| mov a2, a2, lsr #11 | |||
| add a4, a4, a2, lsl #16 | |||
| strd a3, [a1] | |||
| strd a3, a4, [a1] | |||
| sub a2, v4, fp | |||
| mov a3, a2, lsr #11 | |||
| @@ -135,7 +135,7 @@ function idct_row_armv5te | |||
| sub a2, v1, v5 | |||
| mov a2, a2, lsr #11 | |||
| add a4, a4, a2, lsl #16 | |||
| strd a3, [a1, #8] | |||
| strd a3, a4, [a1, #8] | |||
| ldr pc, [sp], #4 | |||
| @@ -144,8 +144,8 @@ row_dc_only: | |||
| bic a3, a3, #0xe000 | |||
| mov a3, a3, lsl #3 | |||
| mov a4, a3 | |||
| strd a3, [a1] | |||
| strd a3, [a1, #8] | |||
| strd a3, a4, [a1] | |||
| strd a3, a4, [a1, #8] | |||
| ldr pc, [sp], #4 | |||
| endfunc | |||
| @@ -159,8 +159,8 @@ function idct_col4_neon | |||
| vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/ | |||
| vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */ | |||
| ldrd r4, [r2] | |||
| ldrd r6, [r2, #16] | |||
| ldrd r4, r5, [r2] | |||
| ldrd r6, r7, [r2, #16] | |||
| orrs r4, r4, r5 | |||
| idct_col4_top | |||
| @@ -176,7 +176,7 @@ function idct_col4_neon | |||
| vadd.i32 q14, q14, q7 | |||
| 1: orrs r6, r6, r7 | |||
| ldrd r4, [r2, #16] | |||
| ldrd r4, r5, [r2, #16] | |||
| it eq | |||
| addeq r2, r2, #16 | |||
| beq 2f | |||
| @@ -188,7 +188,7 @@ function idct_col4_neon | |||
| vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */ | |||
| 2: orrs r4, r4, r5 | |||
| ldrd r4, [r2, #16] | |||
| ldrd r4, r5, [r2, #16] | |||
| it eq | |||
| addeq r2, r2, #16 | |||
| beq 3f | |||