* commit 'fa8fcab1e0d31074c0644c4ac5194474c6c26415': x86: h264_chromamc_10bit: drop pointless PAVG %define x86: mmx2 ---> mmxext in function names swscale: do not forget to swap data in formats with different endianness Conflicts: libavcodec/x86/dsputil_mmx.c libavfilter/x86/gradfun.c libswscale/input.c libswscale/utils.c libswscale/x86/swscale.c tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.1
@@ -86,7 +86,7 @@ static const struct algo fdct_tab[] = { | |||||
#if HAVE_MMX_INLINE | #if HAVE_MMX_INLINE | ||||
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, | { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, | ||||
{ "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, | |||||
{ "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, | |||||
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, | { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, | ||||
#endif | #endif | ||||
@@ -131,7 +131,7 @@ static const struct algo idct_tab[] = { | |||||
#endif | #endif | ||||
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | ||||
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, | { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, | ||||
{ "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, | |||||
{ "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, | |||||
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
#if ARCH_X86_64 && HAVE_YASM | #if ARCH_X86_64 && HAVE_YASM | ||||
{ "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, | { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
@@ -52,7 +52,7 @@ void ff_j_rev_dct1 (DCTELEM *data); | |||||
void ff_wmv2_idct_c(DCTELEM *data); | void ff_wmv2_idct_c(DCTELEM *data); | ||||
void ff_fdct_mmx(DCTELEM *block); | void ff_fdct_mmx(DCTELEM *block); | ||||
void ff_fdct_mmx2(DCTELEM *block); | |||||
void ff_fdct_mmxext(DCTELEM *block); | |||||
void ff_fdct_sse2(DCTELEM *block); | void ff_fdct_sse2(DCTELEM *block); | ||||
#define H264_IDCT(depth) \ | #define H264_IDCT(depth) \ | ||||
@@ -438,21 +438,22 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui | |||||
#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ | #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
QPEL_CAVS(put_, PUT_OP, mmx2) | |||||
QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2) | |||||
QPEL_CAVS(put_, PUT_OP, mmxext) | |||||
QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) | |||||
CAVS_MC(put_, 8, mmx2) | |||||
CAVS_MC(put_, 16,mmx2) | |||||
CAVS_MC(avg_, 8, mmx2) | |||||
CAVS_MC(avg_, 16,mmx2) | |||||
CAVS_MC(put_, 8, mmxext) | |||||
CAVS_MC(put_, 16, mmxext) | |||||
CAVS_MC(avg_, 8, mmxext) | |||||
CAVS_MC(avg_, 16, mmxext) | |||||
static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) { | |||||
static void ff_cavsdsp_init_mmxext(CAVSDSPContext *c, AVCodecContext *avctx) | |||||
{ | |||||
#define dspfunc(PFX, IDX, NUM) \ | #define dspfunc(PFX, IDX, NUM) \ | ||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ | |||||
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \ | |||||
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \ | |||||
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \ | |||||
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \ | |||||
dspfunc(put_cavs_qpel, 0, 16); | dspfunc(put_cavs_qpel, 0, 16); | ||||
dspfunc(put_cavs_qpel, 1, 8); | dspfunc(put_cavs_qpel, 1, 8); | ||||
@@ -475,7 +476,7 @@ CAVS_MC(avg_, 16,3dnow) | |||||
static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { | static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { | ||||
#define dspfunc(PFX, IDX, NUM) \ | #define dspfunc(PFX, IDX, NUM) \ | ||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ | |||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ | |||||
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \ | c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \ | ||||
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \ | c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \ | ||||
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ | c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ | ||||
@@ -496,7 +497,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); | |||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmxext(c, avctx); | |||||
#endif /* HAVE_MMXEXT_INLINE */ | #endif /* HAVE_MMXEXT_INLINE */ | ||||
#if HAVE_AMD3DNOW_INLINE | #if HAVE_AMD3DNOW_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); | if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); | ||||
@@ -78,7 +78,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c) | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
PIXFUNC(avg, 0, mmx2); | |||||
PIXFUNC(avg, 0, mmxext); | |||||
} | } | ||||
#endif | #endif | ||||
@@ -27,7 +27,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c); | |||||
DECL_DIRAC_PIXOP(put, mmx); | DECL_DIRAC_PIXOP(put, mmx); | ||||
DECL_DIRAC_PIXOP(avg, mmx); | DECL_DIRAC_PIXOP(avg, mmx); | ||||
DECL_DIRAC_PIXOP(avg, mmx2); | |||||
DECL_DIRAC_PIXOP(avg, mmxext); | |||||
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); | void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); | ||||
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); | void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h); | ||||
@@ -208,7 +208,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||||
/***********************************/ | /***********************************/ | ||||
/* MMXEXT specific */ | /* MMXEXT specific */ | ||||
#define DEF(x) x ## _mmx2 | |||||
#define DEF(x) x ## _mmxext | |||||
/* Introduced only in MMXEXT set */ | /* Introduced only in MMXEXT set */ | ||||
#define PAVGB "pavgb" | #define PAVGB "pavgb" | ||||
@@ -222,11 +222,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||||
#define put_no_rnd_pixels16_mmx put_pixels16_mmx | #define put_no_rnd_pixels16_mmx put_pixels16_mmx | ||||
#define put_no_rnd_pixels8_mmx put_pixels8_mmx | #define put_no_rnd_pixels8_mmx put_pixels8_mmx | ||||
#define put_pixels16_mmx2 put_pixels16_mmx | |||||
#define put_pixels8_mmx2 put_pixels8_mmx | |||||
#define put_pixels4_mmx2 put_pixels4_mmx | |||||
#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx | |||||
#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx | |||||
#define put_pixels16_mmxext put_pixels16_mmx | |||||
#define put_pixels8_mmxext put_pixels8_mmx | |||||
#define put_pixels4_mmxext put_pixels4_mmx | |||||
#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx | |||||
#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx | |||||
#define put_pixels16_3dnow put_pixels16_mmx | #define put_pixels16_3dnow put_pixels16_mmx | ||||
#define put_pixels8_3dnow put_pixels8_mmx | #define put_pixels8_3dnow put_pixels8_mmx | ||||
#define put_pixels4_3dnow put_pixels4_mmx | #define put_pixels4_3dnow put_pixels4_mmx | ||||
@@ -944,11 +944,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||||
OP(%%mm5, out, %%mm7, d) | OP(%%mm5, out, %%mm7, d) | ||||
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ | #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ | ||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ | |||||
uint8_t *src, \ | |||||
int dstStride, \ | |||||
int srcStride, \ | |||||
int h) \ | |||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \ | |||||
uint8_t *src, \ | |||||
int dstStride, \ | |||||
int srcStride, \ | |||||
int h) \ | |||||
{ \ | { \ | ||||
uint64_t temp; \ | uint64_t temp; \ | ||||
\ | \ | ||||
@@ -1138,11 +1138,11 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \ | |||||
} \ | } \ | ||||
} \ | } \ | ||||
\ | \ | ||||
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ | |||||
uint8_t *src, \ | |||||
int dstStride, \ | |||||
int srcStride, \ | |||||
int h) \ | |||||
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \ | |||||
uint8_t *src, \ | |||||
int dstStride, \ | |||||
int srcStride, \ | |||||
int h) \ | |||||
{ \ | { \ | ||||
__asm__ volatile ( \ | __asm__ volatile ( \ | ||||
"pxor %%mm7, %%mm7 \n\t" \ | "pxor %%mm7, %%mm7 \n\t" \ | ||||
@@ -1775,9 +1775,9 @@ QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) | |||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) | QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) | ||||
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) | QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) | ||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) | QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) | ||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) | |||||
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2) | |||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) | |||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext) | |||||
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext) | |||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext) | |||||
/***********************************/ | /***********************************/ | ||||
/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */ | /* bilinear qpel: not compliant to any spec, only for -lavdopts fast */ | ||||
@@ -1831,10 +1831,10 @@ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1) \ | |||||
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \ | QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \ | ||||
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \ | QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \ | ||||
QPEL_2TAP(put_, 16, mmx2) | |||||
QPEL_2TAP(avg_, 16, mmx2) | |||||
QPEL_2TAP(put_, 8, mmx2) | |||||
QPEL_2TAP(avg_, 8, mmx2) | |||||
QPEL_2TAP(put_, 16, mmxext) | |||||
QPEL_2TAP(avg_, 16, mmxext) | |||||
QPEL_2TAP(put_, 8, mmxext) | |||||
QPEL_2TAP(avg_, 8, mmxext) | |||||
QPEL_2TAP(put_, 16, 3dnow) | QPEL_2TAP(put_, 16, 3dnow) | ||||
QPEL_2TAP(avg_, 16, 3dnow) | QPEL_2TAP(avg_, 16, 3dnow) | ||||
QPEL_2TAP(put_, 8, 3dnow) | QPEL_2TAP(put_, 8, 3dnow) | ||||
@@ -2099,7 +2099,7 @@ static void name(void *mem, int stride, int h) \ | |||||
} while (--h); \ | } while (--h); \ | ||||
} | } | ||||
PREFETCH(prefetch_mmx2, prefetcht0) | |||||
PREFETCH(prefetch_mmxext, prefetcht0) | |||||
PREFETCH(prefetch_3dnow, prefetch) | PREFETCH(prefetch_3dnow, prefetch) | ||||
#undef PREFETCH | #undef PREFETCH | ||||
@@ -2153,22 +2153,22 @@ CHROMA_MC(avg, 8, 10, avx) | |||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
/* CAVS-specific */ | /* CAVS-specific */ | ||||
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) | |||||
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) | |||||
{ | { | ||||
put_pixels8_mmx(dst, src, stride, 8); | put_pixels8_mmx(dst, src, stride, 8); | ||||
} | } | ||||
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) | |||||
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) | |||||
{ | { | ||||
avg_pixels8_mmx(dst, src, stride, 8); | avg_pixels8_mmx(dst, src, stride, 8); | ||||
} | } | ||||
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) | |||||
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) | |||||
{ | { | ||||
put_pixels16_mmx(dst, src, stride, 16); | put_pixels16_mmx(dst, src, stride, 16); | ||||
} | } | ||||
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) | |||||
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) | |||||
{ | { | ||||
avg_pixels16_mmx(dst, src, stride, 16); | avg_pixels16_mmx(dst, src, stride, 16); | ||||
} | } | ||||
@@ -2180,10 +2180,10 @@ void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, | |||||
put_pixels8_mmx(dst, src, stride, 8); | put_pixels8_mmx(dst, src, stride, 8); | ||||
} | } | ||||
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, | |||||
int stride, int rnd) | |||||
void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |||||
int stride, int rnd) | |||||
{ | { | ||||
avg_pixels8_mmx2(dst, src, stride, 8); | |||||
avg_pixels8_mmxext(dst, src, stride, 8); | |||||
} | } | ||||
/* only used in VP3/5/6 */ | /* only used in VP3/5/6 */ | ||||
@@ -2242,7 +2242,7 @@ void ff_ ## OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5 | |||||
DIRAC_PIXOP(put, mmx) | DIRAC_PIXOP(put, mmx) | ||||
DIRAC_PIXOP(avg, mmx) | DIRAC_PIXOP(avg, mmx) | ||||
DIRAC_PIXOP(avg, mmx2) | |||||
DIRAC_PIXOP(avg, mmxext) | |||||
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | ||||
{ | { | ||||
@@ -2620,68 +2620,68 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) | |||||
} | } | ||||
static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, | |||||
int mm_flags) | |||||
static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | |||||
int mm_flags) | |||||
{ | { | ||||
const int bit_depth = avctx->bits_per_raw_sample; | const int bit_depth = avctx->bits_per_raw_sample; | ||||
const int high_bit_depth = bit_depth > 8; | const int high_bit_depth = bit_depth > 8; | ||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
c->prefetch = prefetch_mmx2; | |||||
c->prefetch = prefetch_mmxext; | |||||
if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | |||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | |||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmxext; | |||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; | |||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; | |||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | |||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | |||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; | |||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; | |||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; | |||||
c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | |||||
c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | |||||
c->put_pixels_tab[1][1] = put_pixels8_x2_mmxext; | |||||
c->put_pixels_tab[1][2] = put_pixels8_y2_mmxext; | |||||
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | |||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | |||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | |||||
c->avg_pixels_tab[1][0] = avg_pixels8_mmxext; | |||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmxext; | |||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmxext; | |||||
} | } | ||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | ||||
if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | |||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | |||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | |||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext; | |||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext; | |||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmxext; | |||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmxext; | |||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | |||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; | |||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmxext; | |||||
} | } | ||||
} | } | ||||
if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 || | if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 || | ||||
avctx->codec_id == AV_CODEC_ID_THEORA)) { | avctx->codec_id == AV_CODEC_ID_THEORA)) { | ||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; | |||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; | |||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmxext; | |||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmxext; | |||||
} | } | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
if (CONFIG_H264QPEL) { | if (CONFIG_H264QPEL) { | ||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); | |||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); | |||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); | |||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); | |||||
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); | |||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
} else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
@@ -2697,10 +2697,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, | |||||
} | } | ||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); | |||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); | |||||
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, ); | |||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, ); | |||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
} | } | ||||
@@ -3041,9 +3041,9 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
c->idct = ff_idct_xvid_sse2; | c->idct = ff_idct_xvid_sse2; | ||||
c->idct_permutation_type = FF_SSE2_IDCT_PERM; | c->idct_permutation_type = FF_SSE2_IDCT_PERM; | ||||
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
c->idct_put = ff_idct_xvid_mmx2_put; | |||||
c->idct_add = ff_idct_xvid_mmx2_add; | |||||
c->idct = ff_idct_xvid_mmx2; | |||||
c->idct_put = ff_idct_xvid_mmxext_put; | |||||
c->idct_add = ff_idct_xvid_mmxext_add; | |||||
c->idct = ff_idct_xvid_mmxext; | |||||
} else { | } else { | ||||
c->idct_put = ff_idct_xvid_mmx_put; | c->idct_put = ff_idct_xvid_mmx_put; | ||||
c->idct_add = ff_idct_xvid_mmx_add; | c->idct_add = ff_idct_xvid_mmx_add; | ||||
@@ -3057,7 +3057,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) | if (mm_flags & AV_CPU_FLAG_MMXEXT) | ||||
dsputil_init_mmx2(c, avctx, mm_flags); | |||||
dsputil_init_mmxext(c, avctx, mm_flags); | |||||
if (mm_flags & AV_CPU_FLAG_3DNOW) | if (mm_flags & AV_CPU_FLAG_3DNOW) | ||||
dsputil_init_3dnow(c, avctx, mm_flags); | dsputil_init_3dnow(c, avctx, mm_flags); | ||||
@@ -89,13 +89,13 @@ void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s | |||||
void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||
void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); | |||||
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); | void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); | ||||
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |||||
void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, int stride, int rnd); | |||||
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); | void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); | ||||
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); | void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); | ||||
@@ -647,7 +647,9 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si | |||||
} | } | ||||
#undef SUM | #undef SUM | ||||
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { | |||||
static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy, | |||||
int line_size, int h) | |||||
{ | |||||
int tmp; | int tmp; | ||||
assert( (((int)pix) & 7) == 0); | assert( (((int)pix) & 7) == 0); | ||||
@@ -765,7 +767,9 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in | |||||
} | } | ||||
#undef SUM | #undef SUM | ||||
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | |||||
static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2, | |||||
int line_size, int h) | |||||
{ | |||||
int tmp; | int tmp; | ||||
assert( (((int)pix1) & 7) == 0); | assert( (((int)pix1) & 7) == 0); | ||||
@@ -845,7 +849,10 @@ static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src | |||||
dst[i+0] = src1[i+0]-src2[i+0]; | dst[i+0] = src1[i+0]-src2[i+0]; | ||||
} | } | ||||
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ | |||||
static void sub_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *src1, | |||||
const uint8_t *src2, int w, | |||||
int *left, int *left_top) | |||||
{ | |||||
x86_reg i=0; | x86_reg i=0; | ||||
uint8_t l, lt; | uint8_t l, lt; | ||||
@@ -977,7 +984,7 @@ DCT_SAD_FUNC(mmx) | |||||
#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) | #define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) | ||||
#define MMABS(a,z) MMABS_MMXEXT(a,z) | #define MMABS(a,z) MMABS_MMXEXT(a,z) | ||||
DCT_SAD_FUNC(mmx2) | |||||
DCT_SAD_FUNC(mmxext) | |||||
#undef HSUM | #undef HSUM | ||||
#undef DCT_SAD | #undef DCT_SAD | ||||
@@ -1116,7 +1123,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
if(mm_flags & AV_CPU_FLAG_SSE2){ | if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
c->fdct = ff_fdct_sse2; | c->fdct = ff_fdct_sse2; | ||||
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
c->fdct = ff_fdct_mmx2; | |||||
c->fdct = ff_fdct_mmxext; | |||||
}else{ | }else{ | ||||
c->fdct = ff_fdct_mmx; | c->fdct = ff_fdct_mmx; | ||||
} | } | ||||
@@ -1149,14 +1156,14 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
c->sum_abs_dctelem= sum_abs_dctelem_mmx2; | |||||
c->vsad[4]= vsad_intra16_mmx2; | |||||
c->sum_abs_dctelem = sum_abs_dctelem_mmxext; | |||||
c->vsad[4] = vsad_intra16_mmxext; | |||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
c->vsad[0] = vsad16_mmx2; | |||||
c->vsad[0] = vsad16_mmxext; | |||||
} | } | ||||
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | |||||
c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext; | |||||
} | } | ||||
if(mm_flags & AV_CPU_FLAG_SSE2){ | if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
@@ -440,7 +440,8 @@ static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | |||||
); | ); | ||||
} | } | ||||
static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) | |||||
static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out, | |||||
const int16_t *table) | |||||
{ | { | ||||
__asm__ volatile ( | __asm__ volatile ( | ||||
"pshufw $0x1B, 8(%0), %%mm5 \n\t" | "pshufw $0x1B, 8(%0), %%mm5 \n\t" | ||||
@@ -555,7 +556,7 @@ void ff_fdct_mmx(int16_t *block) | |||||
} | } | ||||
} | } | ||||
void ff_fdct_mmx2(int16_t *block) | |||||
void ff_fdct_mmxext(int16_t *block) | |||||
{ | { | ||||
DECLARE_ALIGNED(8, int64_t, align_tmp)[16]; | DECLARE_ALIGNED(8, int64_t, align_tmp)[16]; | ||||
int16_t *block1= (int16_t*)align_tmp; | int16_t *block1= (int16_t*)align_tmp; | ||||
@@ -566,7 +567,7 @@ void ff_fdct_mmx2(int16_t *block) | |||||
fdct_col_mmx(block, block1, 4); | fdct_col_mmx(block, block1, 4); | ||||
for(i=8;i>0;i--) { | for(i=8;i>0;i--) { | ||||
fdct_row_mmx2(block1, block, table); | |||||
fdct_row_mmxext(block1, block, table); | |||||
block1 += 8; | block1 += 8; | ||||
table += 32; | table += 32; | ||||
block += 8; | block += 8; | ||||
@@ -245,7 +245,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7 | |||||
%if %0==3 | %if %0==3 | ||||
movq %2, %3 | movq %2, %3 | ||||
%endif | %endif | ||||
PAVG %1, %2 | |||||
pavgw %1, %2 | |||||
%endmacro | %endmacro | ||||
%define CHROMAMC_AVG NOTHING | %define CHROMAMC_AVG NOTHING | ||||
@@ -260,7 +260,6 @@ CHROMA_MC4 put | |||||
CHROMA_MC2 put | CHROMA_MC2 put | ||||
%define CHROMAMC_AVG AVG | %define CHROMAMC_AVG AVG | ||||
%define PAVG pavgw | |||||
INIT_XMM sse2 | INIT_XMM sse2 | ||||
CHROMA_MC8 avg | CHROMA_MC8 avg | ||||
%if HAVE_AVX_EXTERNAL | %if HAVE_AVX_EXTERNAL | ||||
@@ -1002,36 +1002,36 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ | OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ | ||||
}\ | }\ | ||||
#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2 | |||||
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2 | |||||
#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2 | |||||
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2 | |||||
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2 | |||||
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2 | |||||
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2 | |||||
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2 | |||||
#define put_pixels8_l2_sse2 put_pixels8_l2_mmxext | |||||
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmxext | |||||
#define put_pixels16_l2_sse2 put_pixels16_l2_mmxext | |||||
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmxext | |||||
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmxext | |||||
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmxext | |||||
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmxext | |||||
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmxext | |||||
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2 | |||||
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2 | |||||
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2 | |||||
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2 | |||||
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2 | |||||
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2 | |||||
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2 | |||||
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2 | |||||
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmxext | |||||
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmxext | |||||
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmxext | |||||
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmxext | |||||
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmxext | |||||
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmxext | |||||
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmxext | |||||
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmxext | |||||
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2 | |||||
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2 | |||||
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2 | |||||
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2 | |||||
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmxext | |||||
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmxext | |||||
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmxext | |||||
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmxext | |||||
#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2 | #define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2 | ||||
#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2 | #define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2 | ||||
#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2 | #define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2 | ||||
#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2 | #define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2 | ||||
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2 | |||||
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2 | |||||
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmxext | |||||
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmxext | |||||
#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ | #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ | ||||
H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ | H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ | ||||
@@ -1045,8 +1045,8 @@ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ | |||||
static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ | static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ | ||||
avg_pixels16_sse2(dst, src, stride, 16); | avg_pixels16_sse2(dst, src, stride, 16); | ||||
} | } | ||||
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2 | |||||
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2 | |||||
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext | |||||
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext | |||||
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ | #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ | ||||
@@ -1168,8 +1168,8 @@ QPEL_H264(put_, PUT_OP, 3dnow) | |||||
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | ||||
#undef PAVGB | #undef PAVGB | ||||
#define PAVGB "pavgb" | #define PAVGB "pavgb" | ||||
QPEL_H264(put_, PUT_OP, mmx2) | |||||
QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2) | |||||
QPEL_H264(put_, PUT_OP, mmxext) | |||||
QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext) | |||||
QPEL_H264_V_XMM(put_, PUT_OP, sse2) | QPEL_H264_V_XMM(put_, PUT_OP, sse2) | ||||
QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) | QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) | ||||
QPEL_H264_HV_XMM(put_, PUT_OP, sse2) | QPEL_H264_HV_XMM(put_, PUT_OP, sse2) | ||||
@@ -1185,7 +1185,7 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) | |||||
#undef PAVGB | #undef PAVGB | ||||
H264_MC_4816(3dnow) | H264_MC_4816(3dnow) | ||||
H264_MC_4816(mmx2) | |||||
H264_MC_4816(mmxext) | |||||
H264_MC_816(H264_MC_V, sse2) | H264_MC_816(H264_MC_V, sse2) | ||||
H264_MC_816(H264_MC_HV, sse2) | H264_MC_816(H264_MC_HV, sse2) | ||||
#if HAVE_SSSE3_INLINE | #if HAVE_SSSE3_INLINE | ||||
@@ -130,18 +130,17 @@ LF_FUNCS(uint16_t, 10) | |||||
#if ARCH_X86_32 && HAVE_YASM | #if ARCH_X86_32 && HAVE_YASM | ||||
LF_FUNC(v8, luma, 8, mmx2) | LF_FUNC(v8, luma, 8, mmx2) | ||||
static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, | |||||
int beta, int8_t *tc0) | |||||
static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, | |||||
int beta, int8_t *tc0) | |||||
{ | { | ||||
if ((tc0[0] & tc0[1]) >= 0) | if ((tc0[0] & tc0[1]) >= 0) | ||||
ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0); | ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0); | ||||
if ((tc0[2] & tc0[3]) >= 0) | if ((tc0[2] & tc0[3]) >= 0) | ||||
ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2); | ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2); | ||||
} | } | ||||
LF_IFUNC(v8, luma_intra, 8, mmx2) | LF_IFUNC(v8, luma_intra, 8, mmx2) | ||||
static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, | |||||
int alpha, int beta) | |||||
static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, | |||||
int alpha, int beta) | |||||
{ | { | ||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta); | ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta); | ||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta); | ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta); | ||||
@@ -247,9 +246,9 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2; | c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2; | ||||
} | } | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmx2; | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2; | c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2; | ||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; | ||||
#endif /* ARCH_X86_32 */ | #endif /* ARCH_X86_32 */ | ||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2; | c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2; | ||||
@@ -512,7 +512,8 @@ __asm__ volatile( | |||||
//----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||
void ff_idct_xvid_mmx2(short *block){ | |||||
void ff_idct_xvid_mmxext(short *block) | |||||
{ | |||||
__asm__ volatile( | __asm__ volatile( | ||||
//# Process each row | //# Process each row | ||||
DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) | DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) | ||||
@@ -542,15 +543,15 @@ void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) | |||||
ff_add_pixels_clamped_mmx(block, dest, line_size); | ff_add_pixels_clamped_mmx(block, dest, line_size); | ||||
} | } | ||||
void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) | |||||
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block) | |||||
{ | { | ||||
ff_idct_xvid_mmx2(block); | |||||
ff_idct_xvid_mmxext(block); | |||||
ff_put_pixels_clamped_mmx(block, dest, line_size); | ff_put_pixels_clamped_mmx(block, dest, line_size); | ||||
} | } | ||||
void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) | |||||
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block) | |||||
{ | { | ||||
ff_idct_xvid_mmx2(block); | |||||
ff_idct_xvid_mmxext(block); | |||||
ff_add_pixels_clamped_mmx(block, dest, line_size); | ff_add_pixels_clamped_mmx(block, dest, line_size); | ||||
} | } | ||||
@@ -34,9 +34,9 @@ void ff_idct_xvid_mmx(short *block); | |||||
void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block); | void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block); | ||||
void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block); | void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block); | ||||
void ff_idct_xvid_mmx2(short *block); | |||||
void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void ff_idct_xvid_mmxext(short *block); | |||||
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void ff_idct_xvid_sse2(short *block); | void ff_idct_xvid_sse2(short *block); | ||||
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block); | void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block); | ||||
@@ -74,7 +74,8 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
); | ); | ||||
} | } | ||||
static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2, | |||||
int stride, int h) | |||||
{ | { | ||||
__asm__ volatile( | __asm__ volatile( | ||||
".p2align 4 \n\t" | ".p2align 4 \n\t" | ||||
@@ -120,7 +121,8 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | |||||
return ret; | return ret; | ||||
} | } | ||||
static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2, | |||||
int stride, int h) | |||||
{ | { | ||||
__asm__ volatile( | __asm__ volatile( | ||||
".p2align 4 \n\t" | ".p2align 4 \n\t" | ||||
@@ -142,7 +144,8 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h | |||||
); | ); | ||||
} | } | ||||
static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2, | |||||
int stride, int h) | |||||
{ | { | ||||
__asm__ volatile( | __asm__ volatile( | ||||
"movq (%1), %%mm0 \n\t" | "movq (%1), %%mm0 \n\t" | ||||
@@ -167,7 +170,8 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h | |||||
); | ); | ||||
} | } | ||||
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2, | |||||
int stride, int h) | |||||
{ | { | ||||
__asm__ volatile( | __asm__ volatile( | ||||
"movq "MANGLE(bone)", %%mm5 \n\t" | "movq "MANGLE(bone)", %%mm5 \n\t" | ||||
@@ -304,7 +308,7 @@ static inline int sum_mmx(void) | |||||
return ret&0xFFFF; | return ret&0xFFFF; | ||||
} | } | ||||
static inline int sum_mmx2(void) | |||||
static inline int sum_mmxext(void) | |||||
{ | { | ||||
int ret; | int ret; | ||||
__asm__ volatile( | __asm__ volatile( | ||||
@@ -424,7 +428,7 @@ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, | |||||
}\ | }\ | ||||
PIX_SAD(mmx) | PIX_SAD(mmx) | ||||
PIX_SAD(mmx2) | |||||
PIX_SAD(mmxext) | |||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
@@ -447,19 +451,19 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
c->sad[1]= sad8_mmx; | c->sad[1]= sad8_mmx; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
c->pix_abs[0][0] = sad16_mmx2; | |||||
c->pix_abs[1][0] = sad8_mmx2; | |||||
c->pix_abs[0][0] = sad16_mmxext; | |||||
c->pix_abs[1][0] = sad8_mmxext; | |||||
c->sad[0]= sad16_mmx2; | |||||
c->sad[1]= sad8_mmx2; | |||||
c->sad[0] = sad16_mmxext; | |||||
c->sad[1] = sad8_mmxext; | |||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
c->pix_abs[0][1] = sad16_x2_mmx2; | |||||
c->pix_abs[0][2] = sad16_y2_mmx2; | |||||
c->pix_abs[0][3] = sad16_xy2_mmx2; | |||||
c->pix_abs[1][1] = sad8_x2_mmx2; | |||||
c->pix_abs[1][2] = sad8_y2_mmx2; | |||||
c->pix_abs[1][3] = sad8_xy2_mmx2; | |||||
c->pix_abs[0][1] = sad16_x2_mmxext; | |||||
c->pix_abs[0][2] = sad16_y2_mmxext; | |||||
c->pix_abs[0][3] = sad16_xy2_mmxext; | |||||
c->pix_abs[1][1] = sad8_x2_mmxext; | |||||
c->pix_abs[1][2] = sad8_y2_mmxext; | |||||
c->pix_abs[1][3] = sad8_xy2_mmxext; | |||||
} | } | ||||
} | } | ||||
if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { | if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { | ||||
@@ -47,8 +47,8 @@ extern uint16_t ff_inv_zigzag_direct16[64]; | |||||
#define COMPILE_TEMPLATE_SSSE3 0 | #define COMPILE_TEMPLATE_SSSE3 0 | ||||
#undef RENAME | #undef RENAME | ||||
#undef RENAMEl | #undef RENAMEl | ||||
#define RENAME(a) a ## _MMX2 | |||||
#define RENAMEl(a) a ## _mmx2 | |||||
#define RENAME(a) a ## _MMXEXT | |||||
#define RENAMEl(a) a ## _mmxext | |||||
#include "mpegvideoenc_template.c" | #include "mpegvideoenc_template.c" | ||||
#endif /* HAVE_MMXEXT_INLINE */ | #endif /* HAVE_MMXEXT_INLINE */ | ||||
@@ -92,7 +92,7 @@ void ff_dct_encode_init_x86(MpegEncContext *s) | |||||
#endif | #endif | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (INLINE_MMXEXT(mm_flags)) | if (INLINE_MMXEXT(mm_flags)) | ||||
s->dct_quantize = dct_quantize_MMX2; | |||||
s->dct_quantize = dct_quantize_MMXEXT; | |||||
#endif | #endif | ||||
#if HAVE_SSE2_INLINE | #if HAVE_SSE2_INLINE | ||||
if (INLINE_SSE2(mm_flags)) | if (INLINE_SSE2(mm_flags)) | ||||
@@ -466,7 +466,10 @@ VC1_MSPEL_MC(avg_) | |||||
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | ||||
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | ||||
}\ | }\ | ||||
static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ | |||||
static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ | |||||
const uint8_t *src, \ | |||||
int stride, int rnd) \ | |||||
{ \ | |||||
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ | ||||
} | } | ||||
@@ -489,7 +492,8 @@ DECLARE_FUNCTION(3, 1) | |||||
DECLARE_FUNCTION(3, 2) | DECLARE_FUNCTION(3, 2) | ||||
DECLARE_FUNCTION(3, 3) | DECLARE_FUNCTION(3, 3) | ||||
static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) | |||||
static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize, | |||||
DCTELEM *block) | |||||
{ | { | ||||
int dc = block[0]; | int dc = block[0]; | ||||
dc = (17 * dc + 4) >> 3; | dc = (17 * dc + 4) >> 3; | ||||
@@ -527,7 +531,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc | |||||
); | ); | ||||
} | } | ||||
static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) | |||||
static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize, | |||||
DCTELEM *block) | |||||
{ | { | ||||
int dc = block[0]; | int dc = block[0]; | ||||
dc = (17 * dc + 4) >> 3; | dc = (17 * dc + 4) >> 3; | ||||
@@ -588,7 +593,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc | |||||
); | ); | ||||
} | } | ||||
static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) | |||||
static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize, | |||||
DCTELEM *block) | |||||
{ | { | ||||
int dc = block[0]; | int dc = block[0]; | ||||
dc = ( 3 * dc + 1) >> 1; | dc = ( 3 * dc + 1) >> 1; | ||||
@@ -626,7 +632,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc | |||||
); | ); | ||||
} | } | ||||
static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) | |||||
static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize, | |||||
DCTELEM *block) | |||||
{ | { | ||||
int dc = block[0]; | int dc = block[0]; | ||||
dc = (3 * dc + 1) >> 1; | dc = (3 * dc + 1) >> 1; | ||||
@@ -712,29 +719,29 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) | |||||
av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) | av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) | ||||
{ | { | ||||
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2; | |||||
dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2; | |||||
dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2; | |||||
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2; | |||||
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext; | |||||
dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext; | |||||
dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext; | |||||
dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext; | |||||
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext; | |||||
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext; | |||||
} | } | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ |
@@ -30,7 +30,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F | |||||
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) | |||||
static void gradfun_filter_line_mmxext(uint8_t *dst, const uint8_t *src, const uint16_t *dc, | |||||
int width, int thresh, | |||||
const uint16_t *dithers) | |||||
{ | { | ||||
intptr_t x; | intptr_t x; | ||||
if (width & 3) { | if (width & 3) { | ||||
@@ -175,7 +177,7 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) | if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
gf->filter_line = gradfun_filter_line_mmx2; | |||||
gf->filter_line = gradfun_filter_line_mmxext; | |||||
#endif | #endif | ||||
#if HAVE_SSSE3_INLINE | #if HAVE_SSSE3_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_SSSE3) | if (cpu_flags & AV_CPU_FLAG_SSSE3) | ||||
@@ -48,7 +48,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
#undef RENAME | #undef RENAME | ||||
#define RENAME(a) a ## _mmx2 | |||||
#define RENAME(a) a ## _mmxext | |||||
#include "yadif_template.c" | #include "yadif_template.c" | ||||
#endif | #endif | ||||
@@ -60,7 +60,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) | if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
yadif->filter_line = yadif_filter_line_mmx2; | |||||
yadif->filter_line = yadif_filter_line_mmxext; | |||||
#endif | #endif | ||||
#if HAVE_SSE2_INLINE | #if HAVE_SSE2_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_SSE2) | if (cpu_flags & AV_CPU_FLAG_SSE2) | ||||
@@ -935,6 +935,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUV420P14LE: | case AV_PIX_FMT_YUV420P14LE: | ||||
case AV_PIX_FMT_YUV420P16LE: | case AV_PIX_FMT_YUV420P16LE: | ||||
case AV_PIX_FMT_YUV422P16LE: | case AV_PIX_FMT_YUV422P16LE: | ||||
case AV_PIX_FMT_YUV444P16LE: | |||||
case AV_PIX_FMT_YUVA444P9LE: | case AV_PIX_FMT_YUVA444P9LE: | ||||
case AV_PIX_FMT_YUVA422P9LE: | case AV_PIX_FMT_YUVA422P9LE: | ||||
@@ -945,7 +946,6 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUVA420P16LE: | case AV_PIX_FMT_YUVA420P16LE: | ||||
case AV_PIX_FMT_YUVA422P16LE: | case AV_PIX_FMT_YUVA422P16LE: | ||||
case AV_PIX_FMT_YUVA444P16LE: | case AV_PIX_FMT_YUVA444P16LE: | ||||
case AV_PIX_FMT_YUV444P16LE: | |||||
c->chrToYV12 = bswap16UV_c; | c->chrToYV12 = bswap16UV_c; | ||||
break; | break; | ||||
#else | #else | ||||
@@ -1186,6 +1186,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUV422P16LE: | case AV_PIX_FMT_YUV422P16LE: | ||||
case AV_PIX_FMT_YUV444P16LE: | case AV_PIX_FMT_YUV444P16LE: | ||||
case AV_PIX_FMT_GRAY16LE: | |||||
c->lumToYV12 = bswap16Y_c; | |||||
break; | |||||
case AV_PIX_FMT_YUVA444P9LE: | case AV_PIX_FMT_YUVA444P9LE: | ||||
case AV_PIX_FMT_YUVA422P9LE: | case AV_PIX_FMT_YUVA422P9LE: | ||||
case AV_PIX_FMT_YUVA420P9LE: | case AV_PIX_FMT_YUVA420P9LE: | ||||
@@ -1195,8 +1198,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUVA420P16LE: | case AV_PIX_FMT_YUVA420P16LE: | ||||
case AV_PIX_FMT_YUVA422P16LE: | case AV_PIX_FMT_YUVA422P16LE: | ||||
case AV_PIX_FMT_YUVA444P16LE: | case AV_PIX_FMT_YUVA444P16LE: | ||||
case AV_PIX_FMT_GRAY16LE: | |||||
c->lumToYV12 = bswap16Y_c; | c->lumToYV12 = bswap16Y_c; | ||||
c->alpToYV12 = bswap16Y_c; | |||||
break; | break; | ||||
#else | #else | ||||
case AV_PIX_FMT_YUV444P9BE: | case AV_PIX_FMT_YUV444P9BE: | ||||
@@ -1215,6 +1218,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUV422P16BE: | case AV_PIX_FMT_YUV422P16BE: | ||||
case AV_PIX_FMT_YUV444P16BE: | case AV_PIX_FMT_YUV444P16BE: | ||||
case AV_PIX_FMT_GRAY16BE: | |||||
c->lumToYV12 = bswap16Y_c; | |||||
break; | |||||
case AV_PIX_FMT_YUVA444P9BE: | case AV_PIX_FMT_YUVA444P9BE: | ||||
case AV_PIX_FMT_YUVA422P9BE: | case AV_PIX_FMT_YUVA422P9BE: | ||||
case AV_PIX_FMT_YUVA420P9BE: | case AV_PIX_FMT_YUVA420P9BE: | ||||
@@ -1224,8 +1230,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) | |||||
case AV_PIX_FMT_YUVA420P16BE: | case AV_PIX_FMT_YUVA420P16BE: | ||||
case AV_PIX_FMT_YUVA422P16BE: | case AV_PIX_FMT_YUVA422P16BE: | ||||
case AV_PIX_FMT_YUVA444P16BE: | case AV_PIX_FMT_YUVA444P16BE: | ||||
case AV_PIX_FMT_GRAY16BE: | |||||
c->lumToYV12 = bswap16Y_c; | c->lumToYV12 = bswap16Y_c; | ||||
c->alpToYV12 = bswap16Y_c; | |||||
break; | break; | ||||
#endif | #endif | ||||
case AV_PIX_FMT_YUYV422: | case AV_PIX_FMT_YUYV422: | ||||
@@ -627,8 +627,9 @@ fail: | |||||
} | } | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | |||||
int16_t *filter, int32_t *filterPos, int numSplits) | |||||
static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, | |||||
int16_t *filter, int32_t *filterPos, | |||||
int numSplits) | |||||
{ | { | ||||
uint8_t *fragmentA; | uint8_t *fragmentA; | ||||
x86_reg imm8OfPShufW1A; | x86_reg imm8OfPShufW1A; | ||||
@@ -1107,10 +1108,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
// can't downscale !!! | // can't downscale !!! | ||||
if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { | if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { | ||||
c->lumMmxextFilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, | |||||
NULL, NULL, 8); | |||||
c->chrMmxextFilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, | |||||
NULL, NULL, NULL, 4); | |||||
c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL, | |||||
NULL, NULL, 8); | |||||
c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc, | |||||
NULL, NULL, NULL, 4); | |||||
#if USE_MMAP | #if USE_MMAP | ||||
c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, | c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, | ||||
@@ -1150,10 +1151,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); | FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); | ||||
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); | FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); | ||||
initMMX2HScaler( dstW, c->lumXInc, c->lumMmxextFilterCode, | |||||
c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); | |||||
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, | |||||
c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); | |||||
init_hscaler_mmxext( dstW, c->lumXInc, c->lumMmxextFilterCode, | |||||
c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); | |||||
init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, | |||||
c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); | |||||
#if USE_MMAP | #if USE_MMAP | ||||
mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ); | mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ); | ||||
@@ -102,7 +102,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; | |||||
#undef RENAME | #undef RENAME | ||||
#undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||
#define COMPILE_TEMPLATE_MMXEXT 1 | #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
#define RENAME(a) a ## _MMX2 | |||||
#define RENAME(a) a ## _MMXEXT | |||||
#include "rgb2rgb_template.c" | #include "rgb2rgb_template.c" | ||||
//SSE2 versions | //SSE2 versions | ||||
@@ -142,7 +142,7 @@ av_cold void rgb2rgb_init_x86(void) | |||||
if (INLINE_AMD3DNOW(cpu_flags)) | if (INLINE_AMD3DNOW(cpu_flags)) | ||||
rgb2rgb_init_3DNOW(); | rgb2rgb_init_3DNOW(); | ||||
if (INLINE_MMXEXT(cpu_flags)) | if (INLINE_MMXEXT(cpu_flags)) | ||||
rgb2rgb_init_MMX2(); | |||||
rgb2rgb_init_MMXEXT(); | |||||
if (INLINE_SSE2(cpu_flags)) | if (INLINE_SSE2(cpu_flags)) | ||||
rgb2rgb_init_SSE2(); | rgb2rgb_init_SSE2(); | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
@@ -85,7 +85,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; | |||||
#undef RENAME | #undef RENAME | ||||
#undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||
#define COMPILE_TEMPLATE_MMXEXT 1 | #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
#define RENAME(a) a ## _MMX2 | |||||
#define RENAME(a) a ## _MMXEXT | |||||
#include "swscale_template.c" | #include "swscale_template.c" | ||||
#endif | #endif | ||||
@@ -211,7 +211,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | |||||
const uint8_t *dither, int offset) | const uint8_t *dither, int offset) | ||||
{ | { | ||||
if(((int)dest) & 15){ | if(((int)dest) & 15){ | ||||
return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset); | |||||
return yuv2yuvX_MMXEXT(filter, filterSize, src, dest, dstW, dither, offset); | |||||
} | } | ||||
if (offset) { | if (offset) { | ||||
__asm__ volatile("movq (%0), %%xmm3\n\t" | __asm__ volatile("movq (%0), %%xmm3\n\t" | ||||
@@ -381,7 +381,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) | |||||
sws_init_swScale_MMX(c); | sws_init_swScale_MMX(c); | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) | if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
sws_init_swScale_MMX2(c); | |||||
sws_init_swScale_MMXEXT(c); | |||||
if (cpu_flags & AV_CPU_FLAG_SSE3){ | if (cpu_flags & AV_CPU_FLAG_SSE3){ | ||||
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) | if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) | ||||
c->yuv2planeX = yuv2yuvX_sse3; | c->yuv2planeX = yuv2yuvX_sse3; | ||||
@@ -63,7 +63,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; | |||||
#undef RENAME | #undef RENAME | ||||
#undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||
#define COMPILE_TEMPLATE_MMXEXT 1 | #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
#define RENAME(a) a ## _MMX2 | |||||
#define RENAME(a) a ## _MMXEXT | |||||
#include "yuv2rgb_template.c" | #include "yuv2rgb_template.c" | ||||
#endif /* HAVE_MMXEXT_INLINE */ | #endif /* HAVE_MMXEXT_INLINE */ | ||||
@@ -77,8 +77,10 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) | |||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | ||||
switch (c->dstFormat) { | switch (c->dstFormat) { | ||||
case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX2; | |||||
case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX2; | |||||
case AV_PIX_FMT_RGB24: | |||||
return yuv420_rgb24_MMXEXT; | |||||
case AV_PIX_FMT_BGR24: | |||||
return yuv420_bgr24_MMXEXT; | |||||
} | } | ||||
} | } | ||||
#endif | #endif | ||||