Originally committed as revision 11868 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | |||||
| } | } | ||||
| /* general case, bilinear */ | /* general case, bilinear */ | ||||
| rnd_reg = rnd ? ff_pw_32 : &ff_pw_28; | |||||
| rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28; | |||||
| asm volatile("movd %2, %%mm4\n\t" | asm volatile("movd %2, %%mm4\n\t" | ||||
| "movd %3, %%mm6\n\t" | "movd %3, %%mm6\n\t" | ||||
| "punpcklwd %%mm4, %%mm4\n\t" | "punpcklwd %%mm4, %%mm4\n\t" | ||||
| @@ -250,7 +250,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* | |||||
| "sub $2, %2 \n\t" | "sub $2, %2 \n\t" | ||||
| "jnz 1b \n\t" | "jnz 1b \n\t" | ||||
| : "+r"(dst), "+r"(src), "+r"(h) | : "+r"(dst), "+r"(src), "+r"(h) | ||||
| : "r"((long)stride), "m"(*ff_pw_32), "m"(x), "m"(y) | |||||
| : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) | |||||
| ); | ); | ||||
| } | } | ||||
| @@ -301,7 +301,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* | |||||
| "sub $1, %2\n\t" | "sub $1, %2\n\t" | ||||
| "jnz 1b\n\t" | "jnz 1b\n\t" | ||||
| : "+r" (dst), "+r"(src), "+r"(h) | : "+r" (dst), "+r"(src), "+r"(h) | ||||
| : "m" (*ff_pw_32), "r"((long)stride) | |||||
| : "m" (ff_pw_32), "r"((long)stride) | |||||
| : "%esi"); | : "%esi"); | ||||
| } | } | ||||
| @@ -54,7 +54,7 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pw_8 ) = 0x0008000800080008ULL; | |||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; | ||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_16 ) = 0x0010001000100010ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_16 ) = 0x0010001000100010ULL; | ||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; | ||||
| DECLARE_ALIGNED_16(const uint64_t, ff_pw_32[2]) = {0x0020002000200020ULL, 0x0020002000200020ULL}; | |||||
| DECLARE_ALIGNED_16(const xmm_t, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL}; | |||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; | ||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL; | ||||
| DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; | ||||
| @@ -24,6 +24,8 @@ | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| typedef struct { uint64_t a, b; } xmm_t; | |||||
| extern const uint64_t ff_bone; | extern const uint64_t ff_bone; | ||||
| extern const uint64_t ff_wtwo; | extern const uint64_t ff_wtwo; | ||||
| @@ -36,7 +38,7 @@ extern const uint64_t ff_pw_8; | |||||
| extern const uint64_t ff_pw_15; | extern const uint64_t ff_pw_15; | ||||
| extern const uint64_t ff_pw_16; | extern const uint64_t ff_pw_16; | ||||
| extern const uint64_t ff_pw_20; | extern const uint64_t ff_pw_20; | ||||
| extern const uint64_t ff_pw_32[2]; | |||||
| extern const xmm_t ff_pw_32; | |||||
| extern const uint64_t ff_pw_42; | extern const uint64_t ff_pw_42; | ||||
| extern const uint64_t ff_pw_64; | extern const uint64_t ff_pw_64; | ||||
| extern const uint64_t ff_pw_96; | extern const uint64_t ff_pw_96; | ||||
| @@ -75,7 +75,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) | |||||
| IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 ) | IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 ) | ||||
| "pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
| :: "m"(*ff_pw_32)); | |||||
| :: "m"(ff_pw_32)); | |||||
| asm volatile( | asm volatile( | ||||
| STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) | STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) | ||||
| @@ -294,7 +294,7 @@ static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) | |||||
| STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7) | STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7) | ||||
| STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7) | STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7) | ||||
| :"+r"(dst) | :"+r"(dst) | ||||
| :"r"(block), "r"((long)stride), "r"(3L*stride), "m"(*ff_pw_32) | |||||
| :"r"(block), "r"((long)stride), "r"(3L*stride), "m"(ff_pw_32) | |||||
| ); | ); | ||||
| } | } | ||||
| @@ -926,7 +926,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in | |||||
| "decl %2 \n\t"\ | "decl %2 \n\t"\ | ||||
| " jnz 1b \n\t"\ | " jnz 1b \n\t"\ | ||||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | : "+a"(tmp), "+c"(dst), "+m"(h)\ | ||||
| : "S"((long)dstStride), "m"(*ff_pw_32)\ | |||||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | |||||
| : "memory"\ | : "memory"\ | ||||
| );\ | );\ | ||||
| }\ | }\ | ||||
| @@ -1200,7 +1200,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst | |||||
| "decl %2 \n\t"\ | "decl %2 \n\t"\ | ||||
| " jnz 1b \n\t"\ | " jnz 1b \n\t"\ | ||||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | : "+a"(tmp), "+c"(dst), "+m"(h)\ | ||||
| : "S"((long)dstStride), "m"(*ff_pw_32)\ | |||||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | |||||
| : "memory"\ | : "memory"\ | ||||
| );\ | );\ | ||||
| tmp += 8 - size*24;\ | tmp += 8 - size*24;\ | ||||