|
|
@@ -199,6 +199,56 @@ static inline void mmx_emms(void) |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng) |
|
|
|
{ |
|
|
|
int i, j; |
|
|
|
|
|
|
|
memset(block, 0, 64 * sizeof(*block)); |
|
|
|
|
|
|
|
switch (test) { |
|
|
|
case 0: |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[i] = (av_lfg_get(prng) % 512) - 256; |
|
|
|
if (is_idct) { |
|
|
|
ff_ref_fdct(block); |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[i] >>= 3; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 1: |
|
|
|
j = av_lfg_get(prng) % 10 + 1; |
|
|
|
for (i = 0; i < j; i++) |
|
|
|
block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256; |
|
|
|
break; |
|
|
|
case 2: |
|
|
|
block[ 0] = av_lfg_get(prng) % 4096 - 2048; |
|
|
|
block[63] = (block[0] & 1) ^ 1; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
|
|
|
|
if (perm == MMX_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
dst[idct_mmx_perm[i]] = src[i]; |
|
|
|
} else if (perm == MMX_SIMPLE_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
dst[idct_simple_mmx_perm[i]] = src[i]; |
|
|
|
} else if (perm == SSE2_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; |
|
|
|
} else if (perm == PARTTRANS_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; |
|
|
|
} else { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
dst[i] = src[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
|
|
{ |
|
|
|
void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct; |
|
|
@@ -219,47 +269,8 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
sysErr[i] = 0; |
|
|
|
for (it = 0; it < NB_ITS; it++) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] = 0; |
|
|
|
switch (test) { |
|
|
|
case 0: |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] = (av_lfg_get(&prng) % 512) - 256; |
|
|
|
if (is_idct) { |
|
|
|
ff_ref_fdct(block1); |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] >>= 3; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 1: { |
|
|
|
int num = av_lfg_get(&prng) % 10 + 1; |
|
|
|
for (i = 0; i < num; i++) |
|
|
|
block1[av_lfg_get(&prng) % 64] = |
|
|
|
av_lfg_get(&prng) % 512 - 256; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 2: |
|
|
|
block1[0] = av_lfg_get(&prng) % 4096 - 2048; |
|
|
|
block1[63] = (block1[0] & 1) ^ 1; |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
if (dct->format == MMX_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[idct_mmx_perm[i]] = block1[i]; |
|
|
|
} else if (dct->format == MMX_SIMPLE_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[idct_simple_mmx_perm[i]] = block1[i]; |
|
|
|
} else if (dct->format == SSE2_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[(i & 0x38) | idct_sse2_row_perm[i & 7]] = block1[i]; |
|
|
|
} else if (dct->format == PARTTRANS_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = block1[i]; |
|
|
|
} else { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[i] = block1[i]; |
|
|
|
} |
|
|
|
init_block(block1, test, is_idct, &prng); |
|
|
|
permute(block, block1, dct->format); |
|
|
|
|
|
|
|
dct->func(block); |
|
|
|
mmx_emms(); |
|
|
@@ -316,45 +327,14 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
|
|
return 0; |
|
|
|
|
|
|
|
/* speed test */ |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] = 0; |
|
|
|
|
|
|
|
switch (test) { |
|
|
|
case 0: |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] = av_lfg_get(&prng) % 512 - 256; |
|
|
|
if (is_idct) { |
|
|
|
ff_ref_fdct(block1); |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block1[i] >>= 3; |
|
|
|
} |
|
|
|
break; |
|
|
|
case 1: |
|
|
|
case 2: |
|
|
|
block1[0] = av_lfg_get(&prng) % 512 - 256; |
|
|
|
block1[1] = av_lfg_get(&prng) % 512 - 256; |
|
|
|
block1[2] = av_lfg_get(&prng) % 512 - 256; |
|
|
|
block1[3] = av_lfg_get(&prng) % 512 - 256; |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
if (dct->format == MMX_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[idct_mmx_perm[i]] = block1[i]; |
|
|
|
} else if (dct->format == MMX_SIMPLE_PERM) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[idct_simple_mmx_perm[i]] = block1[i]; |
|
|
|
} else { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[i] = block1[i]; |
|
|
|
} |
|
|
|
init_block(block, test, is_idct, &prng); |
|
|
|
permute(block1, block, dct->format); |
|
|
|
|
|
|
|
ti = gettime(); |
|
|
|
it1 = 0; |
|
|
|
do { |
|
|
|
for (it = 0; it < NB_ITS_SPEED; it++) { |
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
block[i] = block1[i]; |
|
|
|
memcpy(block, block1, sizeof(block)); |
|
|
|
dct->func(block); |
|
|
|
} |
|
|
|
it1 += NB_ITS_SPEED; |
|
|
|