|
|
|
@@ -245,14 +245,14 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, |
|
|
|
pix = pixels; |
|
|
|
/* unrolled loop */ |
|
|
|
__asm__ volatile ( |
|
|
|
"movq %3, %%mm0 \n\t" |
|
|
|
"movq 8%3, %%mm1 \n\t" |
|
|
|
"movq 16%3, %%mm2 \n\t" |
|
|
|
"movq 24%3, %%mm3 \n\t" |
|
|
|
"movq 32%3, %%mm4 \n\t" |
|
|
|
"movq 40%3, %%mm5 \n\t" |
|
|
|
"movq 48%3, %%mm6 \n\t" |
|
|
|
"movq 56%3, %%mm7 \n\t" |
|
|
|
"movq (%3), %%mm0 \n\t" |
|
|
|
"movq 8(%3), %%mm1 \n\t" |
|
|
|
"movq 16(%3), %%mm2 \n\t" |
|
|
|
"movq 24(%3), %%mm3 \n\t" |
|
|
|
"movq 32(%3), %%mm4 \n\t" |
|
|
|
"movq 40(%3), %%mm5 \n\t" |
|
|
|
"movq 48(%3), %%mm6 \n\t" |
|
|
|
"movq 56(%3), %%mm7 \n\t" |
|
|
|
"packuswb %%mm1, %%mm0 \n\t" |
|
|
|
"packuswb %%mm3, %%mm2 \n\t" |
|
|
|
"packuswb %%mm5, %%mm4 \n\t" |
|
|
|
@@ -262,7 +262,7 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, |
|
|
|
"movq %%mm4, (%0, %1, 2) \n\t" |
|
|
|
"movq %%mm6, (%0, %2) \n\t" |
|
|
|
:: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3), |
|
|
|
"m"(*p) |
|
|
|
"r"(p) |
|
|
|
: "memory"); |
|
|
|
pix += line_size * 4; |
|
|
|
p += 32; |
|
|
|
|