|  |  | @@ -22,6 +22,7 @@ | 
		
	
		
			
			|  |  |  | #include "checkasm.h" | 
		
	
		
			
			|  |  |  | #include "libavcodec/avcodec.h" | 
		
	
		
			
			|  |  |  | #include "libavcodec/h264dsp.h" | 
		
	
		
			
			|  |  |  | #include "libavcodec/h264data.h" | 
		
	
		
			
			|  |  |  | #include "libavutil/common.h" | 
		
	
		
			
			|  |  |  | #include "libavutil/internal.h" | 
		
	
		
			
			|  |  |  | #include "libavutil/intreadwrite.h" | 
		
	
	
		
			
				|  |  | @@ -223,10 +224,97 @@ static void check_idct(void) | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | report("idct"); | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | static void check_idct_multiple(void) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(uint8_t, dst_full,   [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(int16_t, coef_full,  [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]); | 
		
	
		
			
			|  |  |  | LOCAL_ALIGNED_16(uint8_t, nnzc, [15*8]); | 
		
	
		
			
			|  |  |  | H264DSPContext h; | 
		
	
		
			
			|  |  |  | int bit_depth, i, y, func; | 
		
	
		
			
			|  |  |  | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | 
		
	
		
			
			|  |  |  | ff_h264dsp_init(&h, bit_depth, 1); | 
		
	
		
			
			|  |  |  | for (func = 0; func < 3; func++) { | 
		
	
		
			
			|  |  |  | void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL; | 
		
	
		
			
			|  |  |  | const char *name; | 
		
	
		
			
			|  |  |  | int sz = 4, intra = 0; | 
		
	
		
			
			|  |  |  | int block_offset[16] = { 0 }; | 
		
	
		
			
			|  |  |  | switch (func) { | 
		
	
		
			
			|  |  |  | case 0: | 
		
	
		
			
			|  |  |  | idct = h.h264_idct_add16; | 
		
	
		
			
			|  |  |  | name = "h264_idct_add16"; | 
		
	
		
			
			|  |  |  | break; | 
		
	
		
			
			|  |  |  | case 1: | 
		
	
		
			
			|  |  |  | idct = h.h264_idct_add16intra; | 
		
	
		
			
			|  |  |  | name = "h264_idct_add16intra"; | 
		
	
		
			
			|  |  |  | intra = 1; | 
		
	
		
			
			|  |  |  | break; | 
		
	
		
			
			|  |  |  | case 2: | 
		
	
		
			
			|  |  |  | idct = h.h264_idct8_add4; | 
		
	
		
			
			|  |  |  | name = "h264_idct8_add4"; | 
		
	
		
			
			|  |  |  | sz = 8; | 
		
	
		
			
			|  |  |  | break; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | memset(nnzc, 0, 15 * 8); | 
		
	
		
			
			|  |  |  | memset(coef_full, 0, 16 * 16 * SIZEOF_COEF); | 
		
	
		
			
			|  |  |  | for (i = 0; i < 16 * 16; i += sz * sz) { | 
		
	
		
			
			|  |  |  | uint8_t src[8 * 8 * 2]; | 
		
	
		
			
			|  |  |  | uint8_t dst[8 * 8 * 2]; | 
		
	
		
			
			|  |  |  | int16_t coef[8 * 8 * 2]; | 
		
	
		
			
			|  |  |  | int index = i / sz; | 
		
	
		
			
			|  |  |  | int block_y = (index / 16) * sz; | 
		
	
		
			
			|  |  |  | int block_x = index % 16; | 
		
	
		
			
			|  |  |  | int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL; | 
		
	
		
			
			|  |  |  | int nnz = rnd() % 3; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | randomize_buffers(); | 
		
	
		
			
			|  |  |  | if (sz == 4) | 
		
	
		
			
			|  |  |  | dct4x4(coef, bit_depth); | 
		
	
		
			
			|  |  |  | else | 
		
	
		
			
			|  |  |  | dct8x8(coef, bit_depth); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for (y = 0; y < sz; y++) | 
		
	
		
			
			|  |  |  | memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL], | 
		
	
		
			
			|  |  |  | &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if (nnz > 1) | 
		
	
		
			
			|  |  |  | nnz = sz*sz; | 
		
	
		
			
			|  |  |  | memcpy(&coef_full[i*SIZEOF_COEF/sizeof(coef[0])], | 
		
	
		
			
			|  |  |  | coef, nnz * SIZEOF_COEF); | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if (intra && nnz == 1) | 
		
	
		
			
			|  |  |  | nnz = 0; | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | nnzc[scan8[i / 16]] = nnz; | 
		
	
		
			
			|  |  |  | block_offset[i / 16] = offset; | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if (check_func(idct, "%s_%dbpp", name, bit_depth)) { | 
		
	
		
			
			|  |  |  | memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF); | 
		
	
		
			
			|  |  |  | memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF); | 
		
	
		
			
			|  |  |  | memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL); | 
		
	
		
			
			|  |  |  | memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL); | 
		
	
		
			
			|  |  |  | call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc); | 
		
	
		
			
			|  |  |  | call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | 
		
	
		
			
			|  |  |  | if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) || | 
		
	
		
			
			|  |  |  | memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF)) | 
		
	
		
			
			|  |  |  | fail(); | 
		
	
		
			
			|  |  |  | bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | void checkasm_check_h264dsp(void) | 
		
	
		
			
			|  |  |  | { | 
		
	
		
			
			|  |  |  | check_idct(); | 
		
	
		
			
			|  |  |  | check_idct_multiple(); | 
		
	
		
			
			|  |  |  | report("idct"); | 
		
	
		
			
			|  |  |  | } |