|
|
|
@@ -586,6 +586,49 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in |
|
|
|
:"%"REG_a, "memory"); |
|
|
|
} |
|
|
|
|
|
|
|
static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
|
|
{ |
|
|
|
__asm__ volatile ( |
|
|
|
"pcmpeqb %%mm6, %%mm6 \n\t" |
|
|
|
"1: \n\t" |
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
"movq (%1, %3), %%mm2 \n\t" |
|
|
|
"movq 1(%1), %%mm1 \n\t" |
|
|
|
"movq 1(%1, %3), %%mm3 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm3 \n\t" |
|
|
|
PAVGB" %%mm1, %%mm0 \n\t" |
|
|
|
PAVGB" %%mm3, %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
"movq %%mm0, (%2) \n\t" |
|
|
|
"movq %%mm2, (%2, %3) \n\t" |
|
|
|
"movq (%1, %3,2), %%mm0 \n\t" |
|
|
|
"movq 1(%1, %3,2), %%mm1 \n\t" |
|
|
|
"movq (%1, %4), %%mm2 \n\t" |
|
|
|
"movq 1(%1, %4), %%mm3 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm3 \n\t" |
|
|
|
PAVGB" %%mm1, %%mm0 \n\t" |
|
|
|
PAVGB" %%mm3, %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
"movq %%mm0, (%2, %3,2) \n\t" |
|
|
|
"movq %%mm2, (%2, %4) \n\t" |
|
|
|
"lea (%1, %3,4), %1 \n\t" |
|
|
|
"lea (%2, %3,4), %2 \n\t" |
|
|
|
"subl $4, %0 \n\t" |
|
|
|
"jg 1b \n\t" |
|
|
|
: "+g"(h), "+r"(pixels), "+r"(block) |
|
|
|
: "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) |
|
|
|
: "memory" |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
|
|
{ |
|
|
|
__asm__ volatile( |
|
|
|
@@ -650,6 +693,44 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in |
|
|
|
:"%"REG_a, "memory"); |
|
|
|
} |
|
|
|
|
|
|
|
static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
|
|
{ |
|
|
|
__asm__ volatile ( |
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
"pcmpeqb %%mm6, %%mm6 \n\t" |
|
|
|
"add %3, %1 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"1: \n\t" |
|
|
|
"movq (%1), %%mm1 \n\t" |
|
|
|
"movq (%1, %3), %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
PAVGB" %%mm1, %%mm0 \n\t" |
|
|
|
PAVGB" %%mm2, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"movq %%mm0, (%2) \n\t" |
|
|
|
"movq %%mm1, (%2, %3) \n\t" |
|
|
|
"movq (%1, %3,2), %%mm1 \n\t" |
|
|
|
"movq (%1, %4), %%mm0 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm0 \n\t" |
|
|
|
PAVGB" %%mm1, %%mm2 \n\t" |
|
|
|
PAVGB" %%mm0, %%mm1 \n\t" |
|
|
|
"pxor %%mm6, %%mm2 \n\t" |
|
|
|
"pxor %%mm6, %%mm1 \n\t" |
|
|
|
"movq %%mm2, (%2, %3,2) \n\t" |
|
|
|
"movq %%mm1, (%2, %4) \n\t" |
|
|
|
"lea (%1, %3,4), %1 \n\t" |
|
|
|
"lea (%2, %3,4), %2 \n\t" |
|
|
|
"subl $4, %0 \n\t" |
|
|
|
"jg 1b \n\t" |
|
|
|
:"+g"(h), "+r"(pixels), "+r" (block) |
|
|
|
:"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) |
|
|
|
:"memory" |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
|
|
|
{ |
|
|
|
__asm__ volatile( |
|
|
|
|