* commit '711781d7a1714ea4eb0217eb1ba04811978c43d1': x86: checkasm: check for or handle missing cleanup after MMX instructions Merged-by: Hendrik Leppkes <h.leppkes@gmail.com>tags/n3.0
| @@ -26,6 +26,7 @@ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/avstring.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/lfg.h" | |||
| #include "libavutil/timer.h" | |||
| @@ -58,6 +59,7 @@ static av_unused void *func_ref, *func_new; | |||
| /* Declare the function prototype. The first argument is the return value, the remaining | |||
| * arguments are the function parameters. Naming parameters is optional. */ | |||
| #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) | |||
| #define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) | |||
| /* Indicate that the current test has failed */ | |||
| #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) | |||
| @@ -69,8 +71,12 @@ static av_unused void *func_ref, *func_new; | |||
| #define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__) | |||
| #if ARCH_X86 && HAVE_YASM | |||
| /* Verifies that clobbered callee-saved registers are properly saved and restored */ | |||
| /* Verifies that clobbered callee-saved registers are properly saved and restored | |||
| * and that either no MMX registers are touched or emms is issued */ | |||
| void checkasm_checked_call(void *func, ...); | |||
| /* Verifies that clobbered callee-saved registers are properly saved and restored | |||
| * and issues emms for asm functions which are not required to do so */ | |||
| void checkasm_checked_call_emms(void *func, ...); | |||
| #if ARCH_X86_64 | |||
| /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. | |||
| @@ -85,16 +91,24 @@ void checkasm_checked_call(void *func, ...); | |||
| void checkasm_stack_clobber(uint64_t clobber, ...); | |||
| #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ | |||
| = (void *)checkasm_checked_call; | |||
| #define declare_new_emms(cpu_flags, ret, ...) \ | |||
| ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \ | |||
| ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ | |||
| (void *)checkasm_checked_call; | |||
| #define CLOB (UINT64_C(0xdeadbeefdeadbeef)) | |||
| #define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ | |||
| CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ | |||
| checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) | |||
| #elif ARCH_X86_32 | |||
| #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; | |||
| #define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \ | |||
| ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ | |||
| (void *)checkasm_checked_call; | |||
| #define call_new(...) checked_call(func_new, __VA_ARGS__) | |||
| #endif | |||
| #else | |||
| #define declare_new(ret, ...) | |||
| #define declare_new_emms(cpu_flags, ret, ...) | |||
| /* Call the function */ | |||
| #define call_new(...) ((func_type *)func_new)(__VA_ARGS__) | |||
| #endif | |||
| @@ -144,7 +144,7 @@ static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| if (chroma_format == 1) { | |||
| uint8_t *topright = buf0 + 2*16; | |||
| int pred_mode; | |||
| declare_func(void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride); | |||
| for (pred_mode = 0; pred_mode < 15; pred_mode++) { | |||
| if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { | |||
| @@ -163,7 +163,7 @@ static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| int codec, int chroma_format, int bit_depth) | |||
| { | |||
| int pred_mode; | |||
| declare_func(void, uint8_t *src, ptrdiff_t stride); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); | |||
| for (pred_mode = 0; pred_mode < 11; pred_mode++) { | |||
| if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", | |||
| @@ -183,7 +183,7 @@ static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| { | |||
| if (chroma_format == 1) { | |||
| int pred_mode; | |||
| declare_func(void, uint8_t *src, ptrdiff_t stride); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride); | |||
| for (pred_mode = 0; pred_mode < 9; pred_mode++) { | |||
| if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { | |||
| @@ -203,7 +203,7 @@ static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| { | |||
| if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { | |||
| int pred_mode; | |||
| declare_func(void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); | |||
| declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride); | |||
| for (pred_mode = 0; pred_mode < 12; pred_mode++) { | |||
| if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { | |||
| @@ -55,7 +55,7 @@ void checkasm_check_h264qpel(void) | |||
| LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]); | |||
| H264QpelContext h; | |||
| int op, bit_depth, i, j; | |||
| declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); | |||
| for (op = 0; op < 2; op++) { | |||
| qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab; | |||
| @@ -47,7 +47,7 @@ | |||
| #define check_get_pixels(type) \ | |||
| do { \ | |||
| int i; \ | |||
| declare_func(void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \ | |||
| declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \ | |||
| \ | |||
| for (i = 0; i < BUF_UNITS; i++) { \ | |||
| int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */ \ | |||
| @@ -64,7 +64,7 @@ | |||
| #define check_diff_pixels(type) \ | |||
| do { \ | |||
| int i; \ | |||
| declare_func(void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ | |||
| declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \ | |||
| \ | |||
| for (i = 0; i < BUF_UNITS; i++) { \ | |||
| int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */ \ | |||
| @@ -54,8 +54,8 @@ static void check_ipred(void) | |||
| LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]); | |||
| VP9DSPContext dsp; | |||
| int tx, mode, bit_depth; | |||
| declare_func(void, uint8_t *dst, ptrdiff_t stride, | |||
| const uint8_t *left, const uint8_t *top); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, | |||
| const uint8_t *left, const uint8_t *top); | |||
| static const char *const mode_names[N_INTRA_PRED_MODES] = { | |||
| [VERT_PRED] = "vert", | |||
| [HOR_PRED] = "hor", | |||
| @@ -315,7 +315,7 @@ static void check_itxfm(void) | |||
| LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]); | |||
| LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]); | |||
| LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]); | |||
| declare_func(void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); | |||
| VP9DSPContext dsp; | |||
| int y, x, tx, txtp, bit_depth, sub; | |||
| static const char *const txtp_types[N_TXFM_TYPES] = { | |||
| @@ -553,8 +553,8 @@ static void check_mc(void) | |||
| LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]); | |||
| VP9DSPContext dsp; | |||
| int op, hsize, bit_depth, filter, dx, dy; | |||
| declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, | |||
| const uint8_t *ref, ptrdiff_t ref_stride, | |||
| declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride, | |||
| const uint8_t *ref, ptrdiff_t ref_stride, | |||
| int h, int mx, int my); | |||
| static const char *const filter_names[4] = { | |||
| "8tap_smooth", "8tap_regular", "8tap_sharp", "bilin" | |||
| @@ -26,6 +26,7 @@ | |||
| SECTION_RODATA | |||
| error_message: db "failed to preserve register", 0 | |||
| error_message_emms: db "failed to issue emms", 0 | |||
| %if ARCH_X86_64 | |||
| ; just random numbers to reduce the chance of incidental match | |||
| @@ -83,11 +84,22 @@ cglobal stack_clobber, 1,2 | |||
| DECLARE_REG_TMP 7 | |||
| %endif | |||
| %macro report_fail 1 | |||
| mov r9, rax | |||
| mov r10, rdx | |||
| lea r0, [%1] | |||
| xor eax, eax | |||
| call fail_func | |||
| mov rdx, r10 | |||
| mov rax, r9 | |||
| %endmacro | |||
| ;----------------------------------------------------------------------------- | |||
| ; void checkasm_checked_call(void *func, ...) | |||
| ;----------------------------------------------------------------------------- | |||
| INIT_XMM | |||
| cglobal checked_call, 2,15,16,max_args*8+8 | |||
| %macro check_call 0-1 | |||
| cglobal checked_call%1, 2,15,16,max_args*8+8 | |||
| mov t0, r0 | |||
| ; All arguments have been pushed on the stack instead of registers in order to | |||
| @@ -154,16 +166,22 @@ cglobal checked_call, 2,15,16,max_args*8+8 | |||
| ; Call fail_func() with a descriptive message to mark it as a failure | |||
| ; if the called function didn't preserve all callee-saved registers. | |||
| ; Save the return value located in rdx:rax first to prevent clobbering. | |||
| jz .ok | |||
| mov r9, rax | |||
| mov r10, rdx | |||
| lea r0, [error_message] | |||
| xor eax, eax | |||
| call fail_func | |||
| mov rdx, r10 | |||
| mov rax, r9 | |||
| .ok: | |||
| jz .clobber_ok | |||
| report_fail error_message | |||
| .clobber_ok: | |||
| %ifnid %1, _emms | |||
| fstenv [rsp] | |||
| mov r9h, [rsp + 8] | |||
| add r9h, 1 | |||
| jz .emms_ok | |||
| report_fail error_message_emms | |||
| emms | |||
| .emms_ok: | |||
| %else | |||
| emms | |||
| %endif | |||
| RET | |||
| %endmacro | |||
| %else | |||
| @@ -173,10 +191,21 @@ cglobal checked_call, 2,15,16,max_args*8+8 | |||
| %define n5 dword 0xb78d0d1d | |||
| %define n6 dword 0x33627ba7 | |||
| %macro report_fail 1 | |||
| mov r3, eax | |||
| mov r4, edx | |||
| lea r0, [%1] | |||
| mov [esp], r0 | |||
| call fail_func | |||
| mov edx, r4 | |||
| mov eax, r3 | |||
| %endmacro | |||
| %macro check_call 0-1 | |||
| ;----------------------------------------------------------------------------- | |||
| ; void checkasm_checked_call(void *func, ...) | |||
| ;----------------------------------------------------------------------------- | |||
| cglobal checked_call, 1,7 | |||
| cglobal checked_call%1, 1,7 | |||
| mov r3, n3 | |||
| mov r4, n4 | |||
| mov r5, n5 | |||
| @@ -192,16 +221,25 @@ cglobal checked_call, 1,7 | |||
| or r3, r4 | |||
| or r5, r6 | |||
| or r3, r5 | |||
| jz .ok | |||
| mov r3, eax | |||
| mov r4, edx | |||
| lea r0, [error_message] | |||
| mov [esp], r0 | |||
| call fail_func | |||
| mov edx, r4 | |||
| mov eax, r3 | |||
| .ok: | |||
| jz .clobber_ok | |||
| report_fail error_message | |||
| .clobber_ok: | |||
| %ifnid %1, _emms | |||
| fstenv [rsp] | |||
| mov r3h, [rsp + 8] | |||
| add r3h, 1 | |||
| jz .emms_ok | |||
| report_fail error_message_emms | |||
| emms | |||
| .emms_ok: | |||
| %else | |||
| emms | |||
| %endif | |||
| add esp, max_args*4 | |||
| REP_RET | |||
| %endmacro | |||
| %endif ; ARCH_X86_64 | |||
| check_call | |||
| check_call _emms | |||