| @@ -22,6 +22,7 @@ | |||
| #include "checkasm.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/h264dsp.h" | |||
| #include "libavcodec/h264data.h" | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/intreadwrite.h" | |||
| @@ -223,10 +224,97 @@ static void check_idct(void) | |||
| } | |||
| } | |||
| } | |||
| report("idct"); | |||
| } | |||
| static void check_idct_multiple(void) | |||
| { | |||
| LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]); | |||
| LOCAL_ALIGNED_16(uint8_t, nnzc, [15*8]); | |||
| H264DSPContext h; | |||
| int bit_depth, i, y, func; | |||
| declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]); | |||
| for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | |||
| ff_h264dsp_init(&h, bit_depth, 1); | |||
| for (func = 0; func < 3; func++) { | |||
| void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL; | |||
| const char *name; | |||
| int sz = 4, intra = 0; | |||
| int block_offset[16] = { 0 }; | |||
| switch (func) { | |||
| case 0: | |||
| idct = h.h264_idct_add16; | |||
| name = "h264_idct_add16"; | |||
| break; | |||
| case 1: | |||
| idct = h.h264_idct_add16intra; | |||
| name = "h264_idct_add16intra"; | |||
| intra = 1; | |||
| break; | |||
| case 2: | |||
| idct = h.h264_idct8_add4; | |||
| name = "h264_idct8_add4"; | |||
| sz = 8; | |||
| break; | |||
| } | |||
| memset(nnzc, 0, 15 * 8); | |||
| memset(coef_full, 0, 16 * 16 * SIZEOF_COEF); | |||
| for (i = 0; i < 16 * 16; i += sz * sz) { | |||
| uint8_t src[8 * 8 * 2]; | |||
| uint8_t dst[8 * 8 * 2]; | |||
| int16_t coef[8 * 8 * 2]; | |||
| int index = i / sz; | |||
| int block_y = (index / 16) * sz; | |||
| int block_x = index % 16; | |||
| int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL; | |||
| int nnz = rnd() % 3; | |||
| randomize_buffers(); | |||
| if (sz == 4) | |||
| dct4x4(coef, bit_depth); | |||
| else | |||
| dct8x8(coef, bit_depth); | |||
| for (y = 0; y < sz; y++) | |||
| memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL], | |||
| &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL); | |||
| if (nnz > 1) | |||
| nnz = sz*sz; | |||
| memcpy(&coef_full[i*SIZEOF_COEF/sizeof(coef[0])], | |||
| coef, nnz * SIZEOF_COEF); | |||
| if (intra && nnz == 1) | |||
| nnz = 0; | |||
| nnzc[scan8[i / 16]] = nnz; | |||
| block_offset[i / 16] = offset; | |||
| } | |||
| if (check_func(idct, "%s_%dbpp", name, bit_depth)) { | |||
| memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF); | |||
| memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF); | |||
| memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL); | |||
| memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL); | |||
| call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc); | |||
| call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | |||
| if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) || | |||
| memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF)) | |||
| fail(); | |||
| bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void checkasm_check_h264dsp(void) | |||
| { | |||
| check_idct(); | |||
| check_idct_multiple(); | |||
| report("idct"); | |||
| } | |||