This macro has unconditionally used .p2align for a long time and serves no useful purpose.tags/n0.8
| @@ -3245,7 +3245,6 @@ cat > $TMPH <<EOF | |||
| #define CC_TYPE "$cc_type" | |||
| #define CC_VERSION $cc_version | |||
| #define restrict $_restrict | |||
| #define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\\n\\t" | |||
| #define EXTERN_PREFIX "${extern_prefix}" | |||
| #define EXTERN_ASM ${extern_prefix} | |||
| #define SLIBSUF "$SLIBSUF" | |||
| @@ -81,7 +81,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF | |||
| DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; | |||
| DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||
| #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | |||
| #define JUMPALIGN() __asm__ volatile (".p2align 3"::) | |||
| #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | |||
| #define MOVQ_BFE(regd) \ | |||
| @@ -368,7 +368,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size | |||
| { | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| "movd (%1, %3), %%mm1 \n\t" | |||
| @@ -394,7 +394,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size | |||
| { | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| @@ -420,7 +420,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz | |||
| { | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 8(%1), %%mm4 \n\t" | |||
| @@ -838,7 +838,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| PAVGB" 1(%1), %%mm0 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| @@ -37,7 +37,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ | |||
| "movd %4, %%mm5 \n\t" | |||
| "punpcklwd %%mm5, %%mm5 \n\t" | |||
| "punpcklwd %%mm5, %%mm5 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq 8(%1, %0), %%mm1 \n\t" | |||
| @@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) | |||
| "movd %3, %%mm5 \n\t" | |||
| "punpcklwd %%mm5, %%mm5 \n\t" | |||
| "punpcklwd %%mm5, %%mm5 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq 8(%1, %0), %%mm1 \n\t" | |||
| @@ -30,7 +30,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | |||
| MOVQ_BFE(mm6); | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| @@ -71,7 +71,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| @@ -112,7 +112,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| MOVQ_BFE(mm6); | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| @@ -170,7 +170,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| @@ -208,7 +208,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"),%%mm2 \n\t" | |||
| @@ -248,7 +248,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| @@ -460,7 +460,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||
| __asm__ volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| @@ -511,7 +511,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ASMALIGN(3) | |||
| ".p2align 3 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| @@ -35,7 +35,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) | |||
| __asm__ volatile( | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0), %%mm0 \n\t" | |||
| "movq (%0, %2), %%mm2 \n\t" | |||
| @@ -97,7 +97,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint | |||
| __asm__ volatile( | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0), %%mm0 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| @@ -356,7 +356,7 @@ inline void ff_idct_xvid_sse2(short *block) | |||
| TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6)) | |||
| TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7)) | |||
| iLLM_HEAD | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| JNZ("%%ecx", "2f") | |||
| JNZ("%%eax", "3f") | |||
| JNZ("%%edx", "4f") | |||
| @@ -38,7 +38,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| x86_reg len= -(stride*h); | |||
| __asm__ volatile( | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| @@ -73,7 +73,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| __asm__ volatile( | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| @@ -95,7 +95,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | |||
| int ret; | |||
| __asm__ volatile( | |||
| "pxor %%xmm2, %%xmm2 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movdqu (%1), %%xmm0 \n\t" | |||
| "movdqu (%1, %4), %%xmm1 \n\t" | |||
| @@ -119,7 +119,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | |||
| static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| __asm__ volatile( | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| @@ -143,7 +143,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h | |||
| __asm__ volatile( | |||
| "movq (%1), %%mm0 \n\t" | |||
| "add %3, %1 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| @@ -170,7 +170,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| "movq (%1), %%mm0 \n\t" | |||
| "pavgb 1(%1), %%mm0 \n\t" | |||
| "add %3, %1 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm1 \n\t" | |||
| "movq (%1,%3), %%mm2 \n\t" | |||
| @@ -197,7 +197,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int | |||
| { | |||
| x86_reg len= -(stride*h); | |||
| __asm__ volatile( | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||
| @@ -245,7 +245,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm3, %%mm1 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |||
| @@ -66,7 +66,7 @@ __asm__ volatile( | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| @@ -129,7 +129,7 @@ __asm__ volatile( | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| @@ -222,7 +222,7 @@ __asm__ volatile( | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| @@ -285,7 +285,7 @@ __asm__ volatile( | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| @@ -357,7 +357,7 @@ __asm__ volatile( | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| @@ -418,7 +418,7 @@ __asm__ volatile( | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| @@ -158,7 +158,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| "pxor "MM"6, "MM"6 \n\t" | |||
| "psubw (%3), "MM"6 \n\t" // -bias[0] | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] | |||
| SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) | |||
| @@ -190,7 +190,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| "pxor "MM"7, "MM"7 \n\t" // 0 | |||
| "pxor "MM"4, "MM"4 \n\t" // 0 | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] | |||
| SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) | |||
| @@ -789,7 +789,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "4: \n\t" | |||
| Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f) | |||
| Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f) | |||
| @@ -864,7 +864,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "6: \n\t" | |||
| Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f) | |||
| @@ -930,7 +930,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "2: \n\t" | |||
| Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f) | |||
| @@ -1007,7 +1007,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "3: \n\t" | |||
| #undef IDCT | |||
| #define IDCT(src0, src4, src1, src5, dst, shift) \ | |||
| @@ -1071,7 +1071,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "5: \n\t" | |||
| #undef IDCT | |||
| #define IDCT(src0, src4, src1, src5, dst, shift) \ | |||
| @@ -1136,7 +1136,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) \ | |||
| "# .p2align 4 \n\t"\ | |||
| "1: \n\t" | |||
| #undef IDCT | |||
| #define IDCT(src0, src4, src1, src5, dst, shift) \ | |||
| @@ -1210,7 +1210,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) | |||
| "jmp 9f \n\t" | |||
| "#" ASMALIGN(4) | |||
| "# .p2align 4 \n\t" | |||
| "7: \n\t" | |||
| #undef IDCT | |||
| #define IDCT(src0, src4, src1, src5, dst, shift) \ | |||
| @@ -275,7 +275,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ | |||
| LOAD_ROUNDER_MMX("%5") \ | |||
| "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ | |||
| "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ | |||
| ASMALIGN(3) \ | |||
| ".p2align 3 \n\t" \ | |||
| "1: \n\t" \ | |||
| MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ | |||
| NORMALIZE_MMX("%6") \ | |||
| @@ -331,7 +331,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ | |||
| LOAD_ROUNDER_MMX("%4") \ | |||
| "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ | |||
| "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ | |||
| ASMALIGN(3) \ | |||
| ".p2align 3 \n\t" \ | |||
| "1: \n\t" \ | |||
| MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ | |||
| NORMALIZE_MMX("$7") \ | |||
| @@ -369,7 +369,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ | |||
| LOAD_ROUNDER_MMX("%6") \ | |||
| "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ | |||
| "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ | |||
| ASMALIGN(3) \ | |||
| ".p2align 3 \n\t" \ | |||
| "1: \n\t" \ | |||
| MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ | |||
| NORMALIZE_MMX("$6") \ | |||
| @@ -329,7 +329,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ | |||
| "movq %4, %%mm6 \n\t" | |||
| "movq %5, %%mm7 \n\t" | |||
| "jmp 2f \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| @@ -484,7 +484,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ | |||
| "movq %4, %%mm6 \n\t" | |||
| "movq %5, %%mm7 \n\t" | |||
| "jmp 2f \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1) \n\t" | |||
| "movd (%1), %%mm0 \n\t" | |||
| @@ -1239,7 +1239,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, | |||
| "pxor %4, %%mm7 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "pxor %5, %%mm7 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1, %0) \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| @@ -1300,7 +1300,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s | |||
| "movq "MANGLE(mask24r)", %%mm5 \n\t" | |||
| "movq "MANGLE(mask24g)", %%mm6 \n\t" | |||
| "movq "MANGLE(mask24b)", %%mm7 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1, %%"REG_a") \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | |||
| @@ -1369,7 +1369,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | |||
| //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | |||
| __asm__ volatile( | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |||
| PREFETCH" 32(%2, %%"REG_a") \n\t" | |||
| @@ -1519,7 +1519,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u | |||
| //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | |||
| __asm__ volatile( | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |||
| PREFETCH" 32(%2, %%"REG_a") \n\t" | |||
| @@ -1648,7 +1648,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |||
| "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |||
| @@ -1701,7 +1701,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| __asm__ volatile( | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |||
| "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |||
| @@ -1884,7 +1884,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |||
| "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) | |||
| @@ -1937,7 +1937,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| __asm__ volatile( | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |||
| "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |||
| @@ -2012,7 +2012,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_d") \n\t" | |||
| "movd (%0, %%"REG_d"), %%mm0 \n\t" | |||
| @@ -2086,7 +2086,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| "add %%"REG_d", %%"REG_d" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| PREFETCH" 64(%0, %%"REG_d") \n\t" | |||
| PREFETCH" 64(%1, %%"REG_d") \n\t" | |||
| @@ -55,7 +55,7 @@ | |||
| "movq %%mm3, %%mm4 \n\t"\ | |||
| "lea " offset "(%0), %%"REG_d" \n\t"\ | |||
| "mov (%%"REG_d"), %%"REG_S" \n\t"\ | |||
| ASMALIGN(4) /* FIXME Unroll? */\ | |||
| ".p2align 4 \n\t" /* FIXME Unroll? */\ | |||
| "1: \n\t"\ | |||
| "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ | |||
| "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ | |||
| @@ -93,7 +93,7 @@ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "mov (%%"REG_d"), %%"REG_S" \n\t"\ | |||
| ASMALIGN(4) \ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ | |||
| "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ | |||
| @@ -148,7 +148,7 @@ | |||
| #define YSCALEYUV2YV121 \ | |||
| "mov %2, %%"REG_a" \n\t"\ | |||
| ASMALIGN(4) /* FIXME Unroll? */\ | |||
| ".p2align 4 \n\t" /* FIXME Unroll? */\ | |||
| "1: \n\t"\ | |||
| "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ | |||
| "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ | |||
| @@ -164,7 +164,7 @@ | |||
| "pcmpeqw %%mm7, %%mm7 \n\t"\ | |||
| "psrlw $15, %%mm7 \n\t"\ | |||
| "psllw $6, %%mm7 \n\t"\ | |||
| ASMALIGN(4) /* FIXME Unroll? */\ | |||
| ".p2align 4 \n\t" /* FIXME Unroll? */\ | |||
| "1: \n\t"\ | |||
| "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ | |||
| "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ | |||
| @@ -187,14 +187,14 @@ | |||
| #define YSCALEYUV2PACKEDX_UV \ | |||
| __asm__ volatile(\ | |||
| "xor %%"REG_a", %%"REG_a" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "nop \n\t"\ | |||
| "1: \n\t"\ | |||
| "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ | |||
| "mov (%%"REG_d"), %%"REG_S" \n\t"\ | |||
| "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ | |||
| "movq %%mm3, %%mm4 \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "2: \n\t"\ | |||
| "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ | |||
| "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ | |||
| @@ -213,7 +213,7 @@ | |||
| "mov (%%"REG_d"), %%"REG_S" \n\t"\ | |||
| "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ | |||
| "movq "#dst1", "#dst2" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "2: \n\t"\ | |||
| "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ | |||
| "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ | |||
| @@ -241,7 +241,7 @@ | |||
| #define YSCALEYUV2PACKEDX_ACCURATE_UV \ | |||
| __asm__ volatile(\ | |||
| "xor %%"REG_a", %%"REG_a" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "nop \n\t"\ | |||
| "1: \n\t"\ | |||
| "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ | |||
| @@ -250,7 +250,7 @@ | |||
| "pxor %%mm5, %%mm5 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "2: \n\t"\ | |||
| "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ | |||
| "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ | |||
| @@ -295,7 +295,7 @@ | |||
| "pxor %%mm5, %%mm5 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "2: \n\t"\ | |||
| "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ | |||
| "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ | |||
| @@ -381,7 +381,7 @@ | |||
| "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ | |||
| "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |||
| @@ -413,7 +413,7 @@ | |||
| #define REAL_YSCALEYUV2RGB_UV(index, c) \ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |||
| @@ -488,7 +488,7 @@ | |||
| #define REAL_YSCALEYUV2PACKED1(index, c) \ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |||
| @@ -503,7 +503,7 @@ | |||
| #define REAL_YSCALEYUV2RGB1(index, c) \ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |||
| @@ -552,7 +552,7 @@ | |||
| #define REAL_YSCALEYUV2PACKED1b(index, c) \ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |||
| @@ -571,7 +571,7 @@ | |||
| // do vertical chrominance interpolation | |||
| #define REAL_YSCALEYUV2RGB1b(index, c) \ | |||
| "xor "#index", "#index" \n\t"\ | |||
| ASMALIGN(4)\ | |||
| ".p2align 4 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |||
| "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |||
| @@ -2055,7 +2055,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "push %%"REG_BP" \n\t" // we use 7 regs here ... | |||
| "mov %%"REG_a", %%"REG_BP" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movzwl (%2, %%"REG_BP"), %%eax \n\t" | |||
| "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" | |||
| @@ -2099,7 +2099,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "push %%"REG_BP" \n\t" // we use 7 regs here ... | |||
| "mov %%"REG_a", %%"REG_BP" \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movzwl (%2, %%"REG_BP"), %%eax \n\t" | |||
| "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" | |||
| @@ -2150,7 +2150,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in | |||
| dst-= counter/2; | |||
| __asm__ volatile( | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "mov %2, %%"REG_c" \n\t" | |||
| "movzwl (%%"REG_c", %0), %%eax \n\t" | |||
| @@ -2335,7 +2335,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||
| "xor %%"REG_a", %%"REG_a" \n\t" // i | |||
| "xor %%"REG_d", %%"REG_d" \n\t" // xx | |||
| "xorl %%ecx, %%ecx \n\t" // xalpha | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] | |||
| "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] | |||
| @@ -2475,7 +2475,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, | |||
| "xor %%"REG_a", %%"REG_a" \n\t" // i | |||
| "xor %%"REG_d", %%"REG_d" \n\t" // xx | |||
| "xorl %%ecx, %%ecx \n\t" // xalpha | |||
| ASMALIGN(4) | |||
| ".p2align 4 \n\t" | |||
| "1: \n\t" | |||
| "mov %0, %%"REG_S" \n\t" | |||
| "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx] | |||