|
|
|
@@ -30,6 +30,8 @@ |
|
|
|
#include "dsputil_mmx.h" |
|
|
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
|
|
|
static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) |
|
|
|
{ |
|
|
|
__asm__ volatile( |
|
|
|
@@ -323,8 +325,6 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int |
|
|
|
return tmp; |
|
|
|
} |
|
|
|
|
|
|
|
int ff_sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); |
|
|
|
|
|
|
|
static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { |
|
|
|
int tmp; |
|
|
|
__asm__ volatile ( |
|
|
|
@@ -925,17 +925,6 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c |
|
|
|
"paddusw "#t", "#a" \n\t"\ |
|
|
|
"movd "#a", "#dst" \n\t"\ |
|
|
|
|
|
|
|
#define hadamard_func(cpu) \ |
|
|
|
int ff_hadamard8_diff_##cpu (void *s, uint8_t *src1, uint8_t *src2, \ |
|
|
|
int stride, int h); \ |
|
|
|
int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \ |
|
|
|
int stride, int h); |
|
|
|
|
|
|
|
hadamard_func(mmx) |
|
|
|
hadamard_func(mmx2) |
|
|
|
hadamard_func(sse2) |
|
|
|
hadamard_func(ssse3) |
|
|
|
|
|
|
|
#define DCT_SAD4(m,mm,o)\ |
|
|
|
"mov"#m" "#o"+ 0(%1), "#mm"2 \n\t"\ |
|
|
|
"mov"#m" "#o"+16(%1), "#mm"3 \n\t"\ |
|
|
|
@@ -1094,10 +1083,26 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si |
|
|
|
#undef PHADDD |
|
|
|
#endif //HAVE_SSSE3 |
|
|
|
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
int ff_sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h); |
|
|
|
|
|
|
|
#define hadamard_func(cpu) \ |
|
|
|
int ff_hadamard8_diff_##cpu (void *s, uint8_t *src1, uint8_t *src2, \ |
|
|
|
int stride, int h); \ |
|
|
|
int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \ |
|
|
|
int stride, int h); |
|
|
|
|
|
|
|
hadamard_func(mmx) |
|
|
|
hadamard_func(mmx2) |
|
|
|
hadamard_func(sse2) |
|
|
|
hadamard_func(ssse3) |
|
|
|
|
|
|
|
void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
{ |
|
|
|
int mm_flags = av_get_cpu_flags(); |
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
int bit_depth = avctx->bits_per_raw_sample; |
|
|
|
|
|
|
|
if (mm_flags & AV_CPU_FLAG_MMX) { |
|
|
|
@@ -1121,11 +1126,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
c->diff_bytes= diff_bytes_mmx; |
|
|
|
c->sum_abs_dctelem= sum_abs_dctelem_mmx; |
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx; |
|
|
|
c->hadamard8_diff[1]= ff_hadamard8_diff_mmx; |
|
|
|
#endif |
|
|
|
|
|
|
|
c->pix_norm1 = pix_norm1_mmx; |
|
|
|
c->sse[0] = sse16_mmx; |
|
|
|
c->sse[1] = sse8_mmx; |
|
|
|
@@ -1146,10 +1146,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
|
|
|
|
|
|
|
|
if (mm_flags & AV_CPU_FLAG_MMX2) { |
|
|
|
#if HAVE_YASM |
|
|
|
c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx2; |
|
|
|
c->hadamard8_diff[1]= ff_hadamard8_diff_mmx2; |
|
|
|
#endif |
|
|
|
c->sum_abs_dctelem= sum_abs_dctelem_mmx2; |
|
|
|
c->vsad[4]= vsad_intra16_mmx2; |
|
|
|
|
|
|
|
@@ -1164,13 +1160,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
if (bit_depth <= 8) |
|
|
|
c->get_pixels = get_pixels_sse2; |
|
|
|
c->sum_abs_dctelem= sum_abs_dctelem_sse2; |
|
|
|
#if HAVE_YASM |
|
|
|
c->sse[0] = ff_sse16_sse2; |
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2; |
|
|
|
c->hadamard8_diff[1]= ff_hadamard8_diff_sse2; |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
#if HAVE_SSSE3 |
|
|
|
@@ -1180,10 +1169,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
} |
|
|
|
c->add_8x8basis= add_8x8basis_ssse3; |
|
|
|
c->sum_abs_dctelem= sum_abs_dctelem_ssse3; |
|
|
|
#if HAVE_YASM && HAVE_ALIGNED_STACK |
|
|
|
c->hadamard8_diff[0]= ff_hadamard8_diff16_ssse3; |
|
|
|
c->hadamard8_diff[1]= ff_hadamard8_diff_ssse3; |
|
|
|
#endif |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
@@ -1194,6 +1179,35 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) |
|
|
|
c->add_8x8basis= add_8x8basis_3dnow; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
if (mm_flags & AV_CPU_FLAG_MMX) { |
|
|
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; |
|
|
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; |
|
|
|
|
|
|
|
if (mm_flags & AV_CPU_FLAG_MMX2) { |
|
|
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; |
|
|
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; |
|
|
|
} |
|
|
|
|
|
|
|
if (mm_flags & AV_CPU_FLAG_SSE2){ |
|
|
|
c->sse[0] = ff_sse16_sse2; |
|
|
|
|
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; |
|
|
|
c->hadamard8_diff[1] = ff_hadamard8_diff_sse2; |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
#if HAVE_SSSE3 && HAVE_ALIGNED_STACK |
|
|
|
if (mm_flags & AV_CPU_FLAG_SSSE3) { |
|
|
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; |
|
|
|
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; |
|
|
|
} |
|
|
|
#endif |
|
|
|
} |
|
|
|
#endif /* HAVE_YASM */ |
|
|
|
|
|
|
|
ff_dsputil_init_pix_mmx(c, avctx); |
|
|
|
} |