Originally committed as revision 16116 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -23,24 +23,6 @@ | |||
| #warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected. | |||
| /* | |||
| * The type of an value that fits in an MMX register (note that long | |||
| * long constant values MUST be suffixed by LL and unsigned long long | |||
| * values by ULL, lest they be truncated by the compiler) | |||
| */ | |||
| typedef union { | |||
| long long q; /* Quadword (64-bit) value */ | |||
| unsigned long long uq; /* Unsigned Quadword */ | |||
| int d[2]; /* 2 Doubleword (32-bit) values */ | |||
| unsigned int ud[2]; /* 2 Unsigned Doubleword */ | |||
| short w[4]; /* 4 Word (16-bit) values */ | |||
| unsigned short uw[4]; /* 4 Unsigned Word */ | |||
| char b[8]; /* 8 Byte (8-bit) values */ | |||
| unsigned char ub[8]; /* 8 Unsigned Byte */ | |||
| float s[2]; /* Single-precision (32-bit) value */ | |||
| } mmx_t; /* On an 8-byte (64-bit) boundary */ | |||
| #define mmx_i2r(op,imm,reg) \ | |||
| __asm__ volatile (#op " %0, %%" #reg \ | |||
| @@ -36,6 +36,7 @@ | |||
| #ifdef HAVE_MMX | |||
| #include "i386/mmx.h" | |||
| #include "i386/dsputil_mmx.h" | |||
| #endif | |||
| #define xglue(x, y) x ## y | |||
| @@ -2733,13 +2734,8 @@ static void deinterlace_line(uint8_t *dst, | |||
| #else | |||
| { | |||
| mmx_t rounder; | |||
| rounder.uw[0]=4; | |||
| rounder.uw[1]=4; | |||
| rounder.uw[2]=4; | |||
| rounder.uw[3]=4; | |||
| pxor_r2r(mm7,mm7); | |||
| movq_m2r(rounder,mm6); | |||
| movq_m2r(ff_pw_4,mm6); | |||
| } | |||
| for (;size > 3; size-=4) { | |||
| DEINT_LINE_LUM | |||
| @@ -2776,13 +2772,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t * | |||
| #else | |||
| { | |||
| mmx_t rounder; | |||
| rounder.uw[0]=4; | |||
| rounder.uw[1]=4; | |||
| rounder.uw[2]=4; | |||
| rounder.uw[3]=4; | |||
| pxor_r2r(mm7,mm7); | |||
| movq_m2r(rounder,mm6); | |||
| movq_m2r(ff_pw_4,mm6); | |||
| } | |||
| for (;size > 3; size-=4) { | |||
| DEINT_INPLACE_LINE_LUM | |||
| @@ -183,7 +183,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| int src_pos, phase; | |||
| const uint8_t *s; | |||
| int16_t *filter; | |||
| mmx_t tmp; | |||
| uint64_t tmp; | |||
| src_pos = src_start; | |||
| pxor_r2r(mm7, mm7); | |||
| @@ -200,13 +200,13 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| packuswb_r2r(mm7, mm3); | |||
| packuswb_r2r(mm7, mm2); | |||
| movq_r2m(mm0, tmp); | |||
| dst[0] = tmp.ub[0]; | |||
| dst[0] = tmp & 0xFF; | |||
| movq_r2m(mm1, tmp); | |||
| dst[1] = tmp.ub[0]; | |||
| dst[1] = tmp & 0xFF; | |||
| movq_r2m(mm2, tmp); | |||
| dst[2] = tmp.ub[0]; | |||
| dst[2] = tmp & 0xFF; | |||
| movq_r2m(mm3, tmp); | |||
| dst[3] = tmp.ub[0]; | |||
| dst[3] = tmp & 0xFF; | |||
| dst += 4; | |||
| dst_width -= 4; | |||
| } | |||
| @@ -214,7 +214,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| FILTER4(mm0); | |||
| packuswb_r2r(mm7, mm0); | |||
| movq_r2m(mm0, tmp); | |||
| dst[0] = tmp.ub[0]; | |||
| dst[0] = tmp & 0xFF; | |||
| dst++; | |||
| dst_width--; | |||
| } | |||
| @@ -224,17 +224,14 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int wrap, int16_t *filter) | |||
| { | |||
| int sum, i, v; | |||
| int sum, i; | |||
| const uint8_t *s; | |||
| mmx_t tmp; | |||
| mmx_t coefs[4]; | |||
| uint64_t tmp; | |||
| uint64_t coefs[4]; | |||
| for(i=0;i<4;i++) { | |||
| v = filter[i]; | |||
| coefs[i].uw[0] = v; | |||
| coefs[i].uw[1] = v; | |||
| coefs[i].uw[2] = v; | |||
| coefs[i].uw[3] = v; | |||
| tmp = filter[i]; | |||
| coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp; | |||
| } | |||
| pxor_r2r(mm7, mm7); | |||
| @@ -262,7 +259,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| packuswb_r2r(mm7, mm0); | |||
| movq_r2m(mm0, tmp); | |||
| *(uint32_t *)dst = tmp.ud[0]; | |||
| *(uint32_t *)dst = tmp & 0xFFFFFFFF; | |||
| dst += 4; | |||
| s += 4; | |||
| dst_width -= 4; | |||