Originally committed as revision 11829 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -206,13 +206,11 @@ DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 }; | |||||
| #define put_pixels16_mmx2 put_pixels16_mmx | #define put_pixels16_mmx2 put_pixels16_mmx | ||||
| #define put_pixels8_mmx2 put_pixels8_mmx | #define put_pixels8_mmx2 put_pixels8_mmx | ||||
| #define put_pixels4_mmx2 put_pixels4_mmx | #define put_pixels4_mmx2 put_pixels4_mmx | ||||
| #define avg_pixels4_mmx2 avg_pixels4_mmx | |||||
| #define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx | #define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx | ||||
| #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx | #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx | ||||
| #define put_pixels16_3dnow put_pixels16_mmx | #define put_pixels16_3dnow put_pixels16_mmx | ||||
| #define put_pixels8_3dnow put_pixels8_mmx | #define put_pixels8_3dnow put_pixels8_mmx | ||||
| #define put_pixels4_3dnow put_pixels4_mmx | #define put_pixels4_3dnow put_pixels4_mmx | ||||
| #define avg_pixels4_3dnow avg_pixels4_mmx | |||||
| #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx | #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx | ||||
| #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx | #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx | ||||
| @@ -795,6 +795,31 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| :"%"REG_a, "memory"); | :"%"REG_a, "memory"); | ||||
| } | } | ||||
| static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| { | |||||
| do { | |||||
| asm volatile( | |||||
| "movd (%1), %%mm0 \n\t" | |||||
| "movd (%1, %2), %%mm1 \n\t" | |||||
| "movd (%1, %2, 2), %%mm2 \n\t" | |||||
| "movd (%1, %3), %%mm3 \n\t" | |||||
| PAVGB" (%0), %%mm0 \n\t" | |||||
| PAVGB" (%0, %2), %%mm1 \n\t" | |||||
| PAVGB" (%0, %2, 2), %%mm2 \n\t" | |||||
| PAVGB" (%0, %3), %%mm3 \n\t" | |||||
| "movd %%mm0, (%1) \n\t" | |||||
| "movd %%mm1, (%1, %2) \n\t" | |||||
| "movd %%mm2, (%1, %2, 2) \n\t" | |||||
| "movd %%mm3, (%1, %3) \n\t" | |||||
| ::"S"(pixels), "D"(block), | |||||
| "r" ((long)line_size), "r"(3L*line_size) | |||||
| :"memory"); | |||||
| block += 4*line_size; | |||||
| pixels += 4*line_size; | |||||
| h -= 4; | |||||
| } while(h > 0); | |||||
| } | |||||
| //FIXME the following could be optimized too ... | //FIXME the following could be optimized too ... | ||||
| static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | ||||
| DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); | DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); | ||||