| @@ -40,6 +40,7 @@ | |||
| #include "libavutil/time.h" | |||
| #include "dct.h" | |||
| #include "idctdsp.h" | |||
| #include "simple_idct.h" | |||
| #include "aandcttab.h" | |||
| #include "faandct.h" | |||
| @@ -60,64 +61,63 @@ void ff_simple_idct_neon(int16_t *data); | |||
| struct algo { | |||
| const char *name; | |||
| void (*func)(int16_t *block); | |||
| enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, | |||
| SSE2_PERM, PARTTRANS_PERM } format; | |||
| enum idct_permutation_type perm_type; | |||
| int cpu_flag; | |||
| int nonspec; | |||
| }; | |||
| static const struct algo fdct_tab[] = { | |||
| { "REF-DBL", ff_ref_fdct, NO_PERM }, | |||
| { "FAAN", ff_faandct, NO_PERM }, | |||
| { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM }, | |||
| { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM }, | |||
| { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, | |||
| { "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, | |||
| { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, | |||
| { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, | |||
| #if HAVE_MMX_INLINE | |||
| { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, | |||
| { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX }, | |||
| #endif | |||
| #if HAVE_MMXEXT_INLINE | |||
| { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, | |||
| { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT }, | |||
| #endif | |||
| #if HAVE_SSE2_INLINE | |||
| { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, | |||
| { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 }, | |||
| #endif | |||
| #if HAVE_ALTIVEC | |||
| { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC }, | |||
| { "altivecfdct", ff_fdct_altivec, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC }, | |||
| #endif | |||
| { 0 } | |||
| }; | |||
| static const struct algo idct_tab[] = { | |||
| { "FAANI", ff_faanidct, NO_PERM }, | |||
| { "REF-DBL", ff_ref_idct, NO_PERM }, | |||
| { "INT", ff_j_rev_dct, MMX_PERM }, | |||
| { "SIMPLE-C", ff_simple_idct_8, NO_PERM }, | |||
| { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, | |||
| { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, | |||
| { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, | |||
| { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, | |||
| #if HAVE_MMX_INLINE | |||
| { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | |||
| { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, | |||
| { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, | |||
| { "XVID-MMX", ff_idct_xvid_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 }, | |||
| #endif | |||
| #if HAVE_MMXEXT_INLINE | |||
| { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, | |||
| { "XVID-MMXEXT", ff_idct_xvid_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 }, | |||
| #endif | |||
| #if HAVE_SSE2_INLINE | |||
| { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | |||
| { "XVID-SSE2", ff_idct_xvid_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, | |||
| #endif | |||
| #if ARCH_ARM | |||
| { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM }, | |||
| { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM }, | |||
| { "SIMPLE-ARM", ff_simple_idct_arm, FF_IDCT_PERM_NONE }, | |||
| { "INT-ARM", ff_j_rev_dct_arm, FF_IDCT_PERM_LIBMPEG2 }, | |||
| #endif | |||
| #if HAVE_ARMV5TE | |||
| { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE }, | |||
| { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ARMV5TE }, | |||
| #endif | |||
| #if HAVE_ARMV6 | |||
| { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 }, | |||
| { "SIMPLE-ARMV6", ff_simple_idct_armv6, FF_IDCT_PERM_LIBMPEG2, AV_CPU_FLAG_ARMV6 }, | |||
| #endif | |||
| #if HAVE_NEON && ARCH_ARM | |||
| { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON }, | |||
| { "SIMPLE-NEON", ff_simple_idct_neon, FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON }, | |||
| #endif | |||
| { 0 } | |||
| @@ -172,25 +172,32 @@ static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng) | |||
| } | |||
| } | |||
| static void permute(int16_t dst[64], const int16_t src[64], int perm) | |||
| static void permute(int16_t dst[64], const int16_t src[64], | |||
| enum idct_permutation_type perm_type) | |||
| { | |||
| int i; | |||
| if (perm == MMX_PERM) { | |||
| switch (perm_type) { | |||
| case FF_IDCT_PERM_LIBMPEG2: | |||
| for (i = 0; i < 64; i++) | |||
| dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; | |||
| } else if (perm == MMX_SIMPLE_PERM) { | |||
| break; | |||
| case FF_IDCT_PERM_SIMPLE: | |||
| for (i = 0; i < 64; i++) | |||
| dst[idct_simple_mmx_perm[i]] = src[i]; | |||
| } else if (perm == SSE2_PERM) { | |||
| break; | |||
| case FF_IDCT_PERM_SSE2: | |||
| for (i = 0; i < 64; i++) | |||
| dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; | |||
| } else if (perm == PARTTRANS_PERM) { | |||
| break; | |||
| case FF_IDCT_PERM_PARTTRANS: | |||
| for (i = 0; i < 64; i++) | |||
| dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; | |||
| } else { | |||
| break; | |||
| default: | |||
| for (i = 0; i < 64; i++) | |||
| dst[i] = src[i]; | |||
| break; | |||
| } | |||
| } | |||
| @@ -215,12 +222,12 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) | |||
| sysErr[i] = 0; | |||
| for (it = 0; it < NB_ITS; it++) { | |||
| init_block(block1, test, is_idct, &prng); | |||
| permute(block, block1, dct->format); | |||
| permute(block, block1, dct->perm_type); | |||
| dct->func(block); | |||
| emms_c(); | |||
| if (dct->format == SCALE_PERM) { | |||
| if (!strcmp(dct->name, "IJG-AAN-INT")) { | |||
| for (i = 0; i < 64; i++) { | |||
| scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; | |||
| block[i] = (block[i] * scale) >> AANSCALE_BITS; | |||
| @@ -273,7 +280,7 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) | |||
| /* speed test */ | |||
| init_block(block, test, is_idct, &prng); | |||
| permute(block1, block, dct->format); | |||
| permute(block1, block, dct->perm_type); | |||
| ti = av_gettime(); | |||
| it1 = 0; | |||