It provides the following features: * verify correctness by comparing output to the C version. * detect failure to save and restore clobbered callee-saved registers. * detect 32-bit parameters being used as if they were 64-bit in x86-64 (the upper halves are not guaranteed to be zero - but in practice they very often are, which makes those bugs hard to spot otherwise). * easy benchmarking. Compile by running 'make checkasm'. Execute by running 'tests/checkasm/checkasm'. Optional arguments are '--bench' to run benchmarks for all functions, '--bench=<pattern>' to run benchmarks for all functions that starts with <pattern>, and '<integer>' to seed the PRNG for reproducible results. Contains unit tests for most h264pred functions to get started, more tests can be added afterwards using those as a reference. Loosely based on code from x264. Currently only supports x86 and x86-64, but additional architectures shouldn't be too much of an obstacle to add. Note that functions with floating point parameters or floating point return values are not supported. Some compiler-specific features or preprocessor hacks would likely be required to add support for that. Signed-off-by: Janne Grunau <janne-libav@jannau.net>tags/n2.8
| @@ -46,6 +46,7 @@ | |||
| /libavutil/avconfig.h | |||
| /tests/audiogen | |||
| /tests/base64 | |||
| /tests/checkasm/checkasm | |||
| /tests/data/ | |||
| /tests/pixfmts.mak | |||
| /tests/rotozoom | |||
| @@ -173,5 +173,7 @@ testclean: | |||
| -include $(wildcard tests/*.d) | |||
| include $(SRC_PATH)/tests/checkasm/Makefile | |||
| .PHONY: fate* lcov lcov-reset | |||
| .INTERMEDIATE: coverage.info | |||
| @@ -0,0 +1,33 @@ | |||
| # libavcodec tests | |||
| AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o | |||
| CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) | |||
| -include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile | |||
| CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o | |||
| CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) | |||
| -include $(CHECKASMOBJS:.o=.d) | |||
| CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS))) | |||
| $(CHECKASMOBJS): | $(CHECKASMDIRS) | |||
| OBJDIRS += $(CHECKASMDIRS) | |||
| # We rely on function pointers intentionally declared without specified argument types. | |||
| tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes) | |||
| CHECKASM := tests/checkasm/checkasm$(EXESUF) | |||
| $(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS) | |||
| $(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS) | |||
| checkasm: $(CHECKASM) | |||
| clean:: checkasmclean | |||
| checkasmclean: | |||
| $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) | |||
| .PHONY: checkasm | |||
| @@ -0,0 +1,484 @@ | |||
| /* | |||
| * Assembly testing and benchmarking tool | |||
| * Copyright (c) 2015 Henrik Gramner | |||
| * Copyright (c) 2008 Loren Merritt | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation; either version 2 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License along | |||
| * with Libav; if not, write to the Free Software Foundation, Inc., | |||
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||
| */ | |||
| #include <stdarg.h> | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <string.h> | |||
| #include "checkasm.h" | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/random_seed.h" | |||
| #if ARCH_X86 | |||
| #include "libavutil/x86/cpu.h" | |||
| #endif | |||
| #if HAVE_SETCONSOLETEXTATTRIBUTE | |||
| #include <windows.h> | |||
| #define COLOR_RED FOREGROUND_RED | |||
| #define COLOR_GREEN FOREGROUND_GREEN | |||
| #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) | |||
| #else | |||
| #define COLOR_RED 1 | |||
| #define COLOR_GREEN 2 | |||
| #define COLOR_YELLOW 3 | |||
| #endif | |||
| #if HAVE_UNISTD_H | |||
| #include <unistd.h> | |||
| #endif | |||
| #if !HAVE_ISATTY | |||
| #define isatty(fd) 1 | |||
| #endif | |||
| /* List of tests to invoke */ | |||
| static void (* const tests[])(void) = { | |||
| #if CONFIG_H264PRED | |||
| checkasm_check_h264pred, | |||
| #endif | |||
| NULL | |||
| }; | |||
| /* List of cpu flags to check */ | |||
| static const struct { | |||
| const char *name; | |||
| const char *suffix; | |||
| int flag; | |||
| } cpus[] = { | |||
| #if ARCH_X86 | |||
| { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, | |||
| { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, | |||
| { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, | |||
| { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, | |||
| { "SSE", "sse", AV_CPU_FLAG_SSE }, | |||
| { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, | |||
| { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, | |||
| { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, | |||
| { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, | |||
| { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, | |||
| { "AVX", "avx", AV_CPU_FLAG_AVX }, | |||
| { "XOP", "xop", AV_CPU_FLAG_XOP }, | |||
| { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, | |||
| { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, | |||
| { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, | |||
| #endif | |||
| { NULL } | |||
| }; | |||
| typedef struct CheckasmFuncVersion { | |||
| struct CheckasmFuncVersion *next; | |||
| intptr_t (*func)(); | |||
| int ok; | |||
| int cpu; | |||
| int iterations; | |||
| uint64_t cycles; | |||
| } CheckasmFuncVersion; | |||
| /* Binary search tree node */ | |||
| typedef struct CheckasmFunc { | |||
| struct CheckasmFunc *child[2]; | |||
| CheckasmFuncVersion versions; | |||
| char name[1]; | |||
| } CheckasmFunc; | |||
| /* Internal state */ | |||
| static struct { | |||
| CheckasmFunc *funcs; | |||
| CheckasmFunc *current_func; | |||
| CheckasmFuncVersion *current_func_ver; | |||
| const char *bench_pattern; | |||
| int bench_pattern_len; | |||
| int num_checked; | |||
| int num_failed; | |||
| int nop_time; | |||
| int cpu_flag; | |||
| const char *cpu_flag_name; | |||
| } state; | |||
| /* PRNG state */ | |||
| AVLFG checkasm_lfg; | |||
| /* Print colored text to stderr if the terminal supports it */ | |||
| static void color_printf(int color, const char *fmt, ...) | |||
| { | |||
| static int use_color = -1; | |||
| va_list arg; | |||
| #if HAVE_SETCONSOLETEXTATTRIBUTE | |||
| static HANDLE con; | |||
| static WORD org_attributes; | |||
| if (use_color < 0) { | |||
| CONSOLE_SCREEN_BUFFER_INFO con_info; | |||
| con = GetStdHandle(STD_ERROR_HANDLE); | |||
| if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { | |||
| org_attributes = con_info.wAttributes; | |||
| use_color = 1; | |||
| } else | |||
| use_color = 0; | |||
| } | |||
| if (use_color) | |||
| SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); | |||
| #else | |||
| if (use_color < 0) { | |||
| const char *term = getenv("TERM"); | |||
| use_color = term && strcmp(term, "dumb") && isatty(2); | |||
| } | |||
| if (use_color) | |||
| fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); | |||
| #endif | |||
| va_start(arg, fmt); | |||
| vfprintf(stderr, fmt, arg); | |||
| va_end(arg); | |||
| if (use_color) { | |||
| #if HAVE_SETCONSOLETEXTATTRIBUTE | |||
| SetConsoleTextAttribute(con, org_attributes); | |||
| #else | |||
| fprintf(stderr, "\x1b[0m"); | |||
| #endif | |||
| } | |||
| } | |||
| /* Deallocate a tree */ | |||
| static void destroy_func_tree(CheckasmFunc *f) | |||
| { | |||
| if (f) { | |||
| CheckasmFuncVersion *v = f->versions.next; | |||
| while (v) { | |||
| CheckasmFuncVersion *next = v->next; | |||
| free(v); | |||
| v = next; | |||
| } | |||
| destroy_func_tree(f->child[0]); | |||
| destroy_func_tree(f->child[1]); | |||
| free(f); | |||
| } | |||
| } | |||
| /* Allocate a zero-initialized block, clean up and exit on failure */ | |||
| static void *checkasm_malloc(size_t size) | |||
| { | |||
| void *ptr = calloc(1, size); | |||
| if (!ptr) { | |||
| fprintf(stderr, "checkasm: malloc failed\n"); | |||
| destroy_func_tree(state.funcs); | |||
| exit(1); | |||
| } | |||
| return ptr; | |||
| } | |||
| /* Get the suffix of the specified cpu flag */ | |||
| static const char *cpu_suffix(int cpu) | |||
| { | |||
| int i = FF_ARRAY_ELEMS(cpus); | |||
| while (--i >= 0) | |||
| if (cpu & cpus[i].flag) | |||
| return cpus[i].suffix; | |||
| return "c"; | |||
| } | |||
| #ifdef AV_READ_TIME | |||
| static int cmp_nop(const void *a, const void *b) | |||
| { | |||
| return *(const uint16_t*)a - *(const uint16_t*)b; | |||
| } | |||
| /* Measure the overhead of the timing code (in decicycles) */ | |||
| static int measure_nop_time(void) | |||
| { | |||
| uint16_t nops[10000]; | |||
| int i, nop_sum = 0; | |||
| for (i = 0; i < 10000; i++) { | |||
| uint64_t t = AV_READ_TIME(); | |||
| nops[i] = AV_READ_TIME() - t; | |||
| } | |||
| qsort(nops, 10000, sizeof(uint16_t), cmp_nop); | |||
| for (i = 2500; i < 7500; i++) | |||
| nop_sum += nops[i]; | |||
| return nop_sum / 500; | |||
| } | |||
| /* Print benchmark results */ | |||
| static void print_benchs(CheckasmFunc *f) | |||
| { | |||
| if (f) { | |||
| print_benchs(f->child[0]); | |||
| /* Only print functions with at least one assembly version */ | |||
| if (f->versions.cpu || f->versions.next) { | |||
| CheckasmFuncVersion *v = &f->versions; | |||
| do { | |||
| if (v->iterations) { | |||
| int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4; | |||
| printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); | |||
| } | |||
| } while ((v = v->next)); | |||
| } | |||
| print_benchs(f->child[1]); | |||
| } | |||
| } | |||
| #endif | |||
| /* ASCIIbetical sort except preserving natural order for numbers */ | |||
| static int cmp_func_names(const char *a, const char *b) | |||
| { | |||
| int ascii_diff, digit_diff; | |||
| for (; !(ascii_diff = *a - *b) && *a; a++, b++); | |||
| for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); | |||
| return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff; | |||
| } | |||
| /* Get a node with the specified name, creating it if it doesn't exist */ | |||
| static CheckasmFunc *get_func(const char *name, int length) | |||
| { | |||
| CheckasmFunc *f, **f_ptr = &state.funcs; | |||
| /* Search the tree for a matching node */ | |||
| while ((f = *f_ptr)) { | |||
| int cmp = cmp_func_names(name, f->name); | |||
| if (!cmp) | |||
| return f; | |||
| f_ptr = &f->child[(cmp > 0)]; | |||
| } | |||
| /* Allocate and insert a new node into the tree */ | |||
| f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length); | |||
| memcpy(f->name, name, length+1); | |||
| return f; | |||
| } | |||
| /* Perform tests and benchmarks for the specified cpu flag if supported by the host */ | |||
| static void check_cpu_flag(const char *name, int flag) | |||
| { | |||
| int old_cpu_flag = state.cpu_flag; | |||
| flag |= old_cpu_flag; | |||
| av_set_cpu_flags_mask(flag); | |||
| state.cpu_flag = av_get_cpu_flags(); | |||
| if (!flag || state.cpu_flag != old_cpu_flag) { | |||
| int i; | |||
| state.cpu_flag_name = name; | |||
| for (i = 0; tests[i]; i++) | |||
| tests[i](); | |||
| } | |||
| } | |||
| /* Print the name of the current CPU flag, but only do it once */ | |||
| static void print_cpu_name(void) | |||
| { | |||
| if (state.cpu_flag_name) { | |||
| color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); | |||
| state.cpu_flag_name = NULL; | |||
| } | |||
| } | |||
| int main(int argc, char *argv[]) | |||
| { | |||
| int i, seed, ret = 0; | |||
| if (!tests[0] || !cpus[0].flag) { | |||
| fprintf(stderr, "checkasm: no tests to perform\n"); | |||
| return 1; | |||
| } | |||
| if (argc > 1 && !strncmp(argv[1], "--bench", 7)) { | |||
| #ifndef AV_READ_TIME | |||
| fprintf(stderr, "checkasm: --bench is not supported on your system\n"); | |||
| return 1; | |||
| #endif | |||
| if (argv[1][7] == '=') { | |||
| state.bench_pattern = argv[1] + 8; | |||
| state.bench_pattern_len = strlen(state.bench_pattern); | |||
| } else | |||
| state.bench_pattern = ""; | |||
| argc--; | |||
| argv++; | |||
| } | |||
| seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed(); | |||
| fprintf(stderr, "checkasm: using random seed %u\n", seed); | |||
| av_lfg_init(&checkasm_lfg, seed); | |||
| check_cpu_flag(NULL, 0); | |||
| for (i = 0; cpus[i].flag; i++) | |||
| check_cpu_flag(cpus[i].name, cpus[i].flag); | |||
| if (state.num_failed) { | |||
| fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); | |||
| ret = 1; | |||
| } else { | |||
| fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); | |||
| #ifdef AV_READ_TIME | |||
| if (state.bench_pattern) { | |||
| state.nop_time = measure_nop_time(); | |||
| printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); | |||
| print_benchs(state.funcs); | |||
| } | |||
| #endif | |||
| } | |||
| destroy_func_tree(state.funcs); | |||
| return ret; | |||
| } | |||
| /* Decide whether or not the specified function needs to be tested and | |||
| * allocate/initialize data structures if needed. Returns a pointer to a | |||
| * reference function if the function should be tested, otherwise NULL */ | |||
| intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() | |||
| { | |||
| char name_buf[256]; | |||
| intptr_t (*ref)() = func; | |||
| CheckasmFuncVersion *v; | |||
| int name_length; | |||
| va_list arg; | |||
| va_start(arg, name); | |||
| name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); | |||
| va_end(arg); | |||
| if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) | |||
| return NULL; | |||
| state.current_func = get_func(name_buf, name_length); | |||
| v = &state.current_func->versions; | |||
| if (v->func) { | |||
| CheckasmFuncVersion *prev; | |||
| do { | |||
| /* Only test functions that haven't already been tested */ | |||
| if (v->func == func) | |||
| return NULL; | |||
| if (v->ok) | |||
| ref = v->func; | |||
| prev = v; | |||
| } while ((v = v->next)); | |||
| v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); | |||
| } | |||
| v->func = func; | |||
| v->ok = 1; | |||
| v->cpu = state.cpu_flag; | |||
| state.current_func_ver = v; | |||
| if (state.cpu_flag) | |||
| state.num_checked++; | |||
| return ref; | |||
| } | |||
| /* Decide whether or not the current function needs to be benchmarked */ | |||
| int checkasm_bench_func(void) | |||
| { | |||
| return !state.num_failed && state.bench_pattern && | |||
| !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); | |||
| } | |||
| /* Indicate that the current test has failed */ | |||
| void checkasm_fail_func(const char *msg, ...) | |||
| { | |||
| if (state.current_func_ver->cpu && state.current_func_ver->ok) { | |||
| va_list arg; | |||
| print_cpu_name(); | |||
| fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); | |||
| va_start(arg, msg); | |||
| vfprintf(stderr, msg, arg); | |||
| va_end(arg); | |||
| fprintf(stderr, ")\n"); | |||
| state.current_func_ver->ok = 0; | |||
| state.num_failed++; | |||
| } | |||
| } | |||
| /* Update benchmark results of the current function */ | |||
| void checkasm_update_bench(int iterations, uint64_t cycles) | |||
| { | |||
| state.current_func_ver->iterations += iterations; | |||
| state.current_func_ver->cycles += cycles; | |||
| } | |||
| /* Print the outcome of all tests performed since the last time this function was called */ | |||
| void checkasm_report(const char *name, ...) | |||
| { | |||
| static int prev_checked, prev_failed, max_length; | |||
| if (state.num_checked > prev_checked) { | |||
| print_cpu_name(); | |||
| if (*name) { | |||
| int pad_length = max_length; | |||
| va_list arg; | |||
| fprintf(stderr, " - "); | |||
| va_start(arg, name); | |||
| pad_length -= vfprintf(stderr, name, arg); | |||
| va_end(arg); | |||
| fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); | |||
| } else | |||
| fprintf(stderr, " - %-*s [", max_length, state.current_func->name); | |||
| if (state.num_failed == prev_failed) | |||
| color_printf(COLOR_GREEN, "OK"); | |||
| else | |||
| color_printf(COLOR_RED, "FAILED"); | |||
| fprintf(stderr, "]\n"); | |||
| prev_checked = state.num_checked; | |||
| prev_failed = state.num_failed; | |||
| } else if (!state.cpu_flag) { | |||
| int length; | |||
| /* Calculate the amount of padding required to make the output vertically aligned */ | |||
| if (*name) { | |||
| va_list arg; | |||
| va_start(arg, name); | |||
| length = vsnprintf(NULL, 0, name, arg); | |||
| va_end(arg); | |||
| } else | |||
| length = strlen(state.current_func->name); | |||
| if (length > max_length) | |||
| max_length = length; | |||
| } | |||
| } | |||
| @@ -0,0 +1,115 @@ | |||
| /* | |||
| * Assembly testing and benchmarking tool | |||
| * Copyright (c) 2015 Henrik Gramner | |||
| * Copyright (c) 2008 Loren Merritt | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation; either version 2 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License along | |||
| * with Libav; if not, write to the Free Software Foundation, Inc., | |||
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||
| */ | |||
| #ifndef CHECKASM_H | |||
| #define CHECKASM_H | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/avstring.h" | |||
| #include "libavutil/lfg.h" | |||
| #include "libavutil/timer.h" | |||
| void checkasm_check_h264pred(void); | |||
| intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3); | |||
| int checkasm_bench_func(void); | |||
| void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); | |||
| void checkasm_update_bench(int iterations, uint64_t cycles); | |||
| void checkasm_report(const char *name, ...) av_printf_format(1, 2); | |||
| extern AVLFG checkasm_lfg; | |||
| #define rnd() av_lfg_get(&checkasm_lfg) | |||
| static av_unused intptr_t (*func_ref)(); | |||
| static av_unused intptr_t (*func_new)(); | |||
| #define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ | |||
| /* Decide whether or not the specified function needs to be tested */ | |||
| #define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\ | |||
| (func_ref = checkasm_check_func(func_new, __VA_ARGS__))) | |||
| /* Indicate that the current test has failed */ | |||
| #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) | |||
| /* Print the test outcome */ | |||
| #define report(...) checkasm_report("" __VA_ARGS__) | |||
| /* Call the reference function */ | |||
| #define call_ref(...) func_ref(__VA_ARGS__) | |||
| #if ARCH_X86 && HAVE_YASM | |||
| /* Verifies that clobbered callee-saved registers are properly saved and restored */ | |||
| intptr_t checkasm_checked_call(intptr_t (*func)(), ...); | |||
| #endif | |||
| /* Call the function */ | |||
| #if ARCH_X86_64 && HAVE_YASM | |||
| /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. | |||
| * This is done by clobbering the stack with junk around the stack pointer and calling the | |||
| * assembly function through x264_checkasm_call with added dummy arguments which forces all | |||
| * real arguments to be passed on the stack and not in registers. For 32-bit arguments the | |||
| * upper half of the 64-bit register locations on the stack will now contain junk which will | |||
| * cause misbehaving functions to either produce incorrect output or segfault. Note that | |||
| * even though this works extremely well in practice, it's technically not guaranteed | |||
| * and false negatives is theoretically possible, but there can never be any false positives. | |||
| */ | |||
| void checkasm_stack_clobber(uint64_t clobber, ...); | |||
| #define CLOB (UINT64_C(0xdeadbeefdeadbeef)) | |||
| #define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ | |||
| CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ | |||
| checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) | |||
| #elif ARCH_X86_32 && HAVE_YASM | |||
| #define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__) | |||
| #else | |||
| #define call_new(...) func_new(__VA_ARGS__) | |||
| #endif | |||
| /* Benchmark the function */ | |||
| #ifdef AV_READ_TIME | |||
| #define bench_new(...)\ | |||
| do {\ | |||
| if (checkasm_bench_func()) {\ | |||
| intptr_t (*tfunc)() = func_new;\ | |||
| uint64_t tsum = 0;\ | |||
| int ti, tcount = 0;\ | |||
| for (ti = 0; ti < BENCH_RUNS; ti++) {\ | |||
| uint64_t t = AV_READ_TIME();\ | |||
| tfunc(__VA_ARGS__);\ | |||
| tfunc(__VA_ARGS__);\ | |||
| tfunc(__VA_ARGS__);\ | |||
| tfunc(__VA_ARGS__);\ | |||
| t = AV_READ_TIME() - t;\ | |||
| if (t*tcount <= tsum*4 && ti > 0) {\ | |||
| tsum += t;\ | |||
| tcount++;\ | |||
| }\ | |||
| }\ | |||
| checkasm_update_bench(tcount, tsum);\ | |||
| }\ | |||
| } while (0) | |||
| #else | |||
| #define bench_new(...) | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,252 @@ | |||
| /* | |||
| * Copyright (c) 2015 Henrik Gramner | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation; either version 2 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License along | |||
| * with Libav; if not, write to the Free Software Foundation, Inc., | |||
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||
| */ | |||
| #include <string.h> | |||
| #include "checkasm.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/h264pred.h" | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/intreadwrite.h" | |||
| static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 }; | |||
| static const char * const pred4x4_modes[4][15] = { | |||
| { /* H264 */ | |||
| [VERT_PRED ] = "vertical", | |||
| [HOR_PRED ] = "horizontal", | |||
| [DC_PRED ] = "dc", | |||
| [DIAG_DOWN_LEFT_PRED ] = "down_left", | |||
| [DIAG_DOWN_RIGHT_PRED] = "down_right", | |||
| [VERT_RIGHT_PRED ] = "vertical_right", | |||
| [HOR_DOWN_PRED ] = "horizontal_right", | |||
| [VERT_LEFT_PRED ] = "vertical_left", | |||
| [HOR_UP_PRED ] = "horizontal_up", | |||
| [LEFT_DC_PRED ] = "left_dc", | |||
| [TOP_DC_PRED ] = "top_dc", | |||
| [DC_128_PRED ] = "dc_128", | |||
| }, | |||
| { /* VP8 */ | |||
| [VERT_PRED ] = "vertical_vp8", | |||
| [HOR_PRED ] = "horizontal_vp8", | |||
| [VERT_LEFT_PRED] = "vertical_left_vp8", | |||
| [TM_VP8_PRED ] = "tm_vp8", | |||
| [DC_127_PRED ] = "dc_127_vp8", | |||
| [DC_129_PRED ] = "dc_129_vp8", | |||
| }, | |||
| { /* RV40 */ | |||
| [DIAG_DOWN_LEFT_PRED ] = "down_left_rv40", | |||
| [VERT_LEFT_PRED ] = "vertical_left_rv40", | |||
| [HOR_UP_PRED ] = "horizontal_up_rv40", | |||
| [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40", | |||
| [HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40", | |||
| [VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40", | |||
| }, | |||
| { /* SVQ3 */ | |||
| [DIAG_DOWN_LEFT_PRED] = "down_left_svq3", | |||
| }, | |||
| }; | |||
| static const char * const pred8x8_modes[4][11] = { | |||
| { /* H264 */ | |||
| [DC_PRED8x8 ] = "dc", | |||
| [HOR_PRED8x8 ] = "horizontal", | |||
| [VERT_PRED8x8 ] = "vertical", | |||
| [PLANE_PRED8x8 ] = "plane", | |||
| [LEFT_DC_PRED8x8 ] = "left_dc", | |||
| [TOP_DC_PRED8x8 ] = "top_dc", | |||
| [DC_128_PRED8x8 ] = "dc_128", | |||
| [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t", | |||
| [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt", | |||
| [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00", | |||
| [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0", | |||
| }, | |||
| { /* VP8 */ | |||
| [PLANE_PRED8x8 ] = "tm_vp8", | |||
| [DC_127_PRED8x8] = "dc_127_vp8", | |||
| [DC_129_PRED8x8] = "dc_129_vp8", | |||
| }, | |||
| { /* RV40 */ | |||
| [DC_PRED8x8 ] = "dc_rv40", | |||
| [LEFT_DC_PRED8x8] = "left_dc_rv40", | |||
| [TOP_DC_PRED8x8 ] = "top_dc_rv40", | |||
| }, | |||
| { /* SVQ3 */ | |||
| }, | |||
| }; | |||
| static const char * const pred16x16_modes[4][9] = { | |||
| { /* H264 */ | |||
| [DC_PRED8x8 ] = "dc", | |||
| [HOR_PRED8x8 ] = "horizontal", | |||
| [VERT_PRED8x8 ] = "vertical", | |||
| [PLANE_PRED8x8 ] = "plane", | |||
| [LEFT_DC_PRED8x8] = "left_dc", | |||
| [TOP_DC_PRED8x8 ] = "top_dc", | |||
| [DC_128_PRED8x8 ] = "dc_128", | |||
| }, | |||
| { /* VP8 */ | |||
| [PLANE_PRED8x8 ] = "tm_vp8", | |||
| [DC_127_PRED8x8] = "dc_127_vp8", | |||
| [DC_129_PRED8x8] = "dc_129_vp8", | |||
| }, | |||
| { /* RV40 */ | |||
| [PLANE_PRED8x8] = "plane_rv40", | |||
| }, | |||
| { /* SVQ3 */ | |||
| [PLANE_PRED8x8] = "plane_svq3", | |||
| }, | |||
| }; | |||
| static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; | |||
| #define SIZEOF_PIXEL ((bit_depth + 7) / 8) | |||
| #define BUF_SIZE (3*16*17) | |||
| #define check_pred_func(func, name, mode_name)\ | |||
| (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\ | |||
| check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\ | |||
| check_func(func, "pred%s_%s", name, mode_name))) | |||
| #define randomize_buffers()\ | |||
| do {\ | |||
| uint32_t mask = pixel_mask[bit_depth-8];\ | |||
| int i;\ | |||
| for (i = 0; i < BUF_SIZE; i += 4) {\ | |||
| uint32_t r = rnd() & mask;\ | |||
| AV_WN32A(buf0+i, r);\ | |||
| AV_WN32A(buf1+i, r);\ | |||
| }\ | |||
| } while (0) | |||
| #define src0 (buf0 + 4*16) /* Offset to allow room for top and left */ | |||
| #define src1 (buf1 + 4*16) | |||
| static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| int codec, int chroma_format, int bit_depth) | |||
| { | |||
| if (chroma_format == 1) { | |||
| uint8_t *topright = buf0 + 2*16; | |||
| int pred_mode; | |||
| for (pred_mode = 0; pred_mode < 15; pred_mode++) { | |||
| if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { | |||
| randomize_buffers(); | |||
| call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL); | |||
| call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); | |||
| if (memcmp(buf0, buf1, BUF_SIZE)) | |||
| fail(); | |||
| bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| int codec, int chroma_format, int bit_depth) | |||
| { | |||
| int pred_mode; | |||
| for (pred_mode = 0; pred_mode < 11; pred_mode++) { | |||
| if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", | |||
| pred8x8_modes[codec][pred_mode])) { | |||
| randomize_buffers(); | |||
| call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| if (memcmp(buf0, buf1, BUF_SIZE)) | |||
| fail(); | |||
| bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| } | |||
| } | |||
| } | |||
| static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| int codec, int chroma_format, int bit_depth) | |||
| { | |||
| if (chroma_format == 1) { | |||
| int pred_mode; | |||
| for (pred_mode = 0; pred_mode < 9; pred_mode++) { | |||
| if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { | |||
| randomize_buffers(); | |||
| call_ref(src0, (ptrdiff_t)48); | |||
| call_new(src1, (ptrdiff_t)48); | |||
| if (memcmp(buf0, buf1, BUF_SIZE)) | |||
| fail(); | |||
| bench_new(src1, (ptrdiff_t)48); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, | |||
| int codec, int chroma_format, int bit_depth) | |||
| { | |||
| if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { | |||
| int pred_mode; | |||
| for (pred_mode = 0; pred_mode < 12; pred_mode++) { | |||
| if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { | |||
| int neighbors; | |||
| for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) { | |||
| int has_topleft = neighbors & 0x8000; | |||
| int has_topright = neighbors & 0x4000; | |||
| if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft) | |||
| continue; /* Those aren't allowed according to the spec */ | |||
| randomize_buffers(); | |||
| call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| if (memcmp(buf0, buf1, BUF_SIZE)) | |||
| fail(); | |||
| bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| /* TODO: Add tests for H.264 lossless H/V prediction */ | |||
| void checkasm_check_h264pred(void) | |||
| { | |||
| static const struct { | |||
| void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int); | |||
| const char *name; | |||
| } tests[] = { | |||
| { check_pred4x4, "pred4x4" }, | |||
| { check_pred8x8, "pred8x8" }, | |||
| { check_pred16x16, "pred16x16" }, | |||
| { check_pred8x8l, "pred8x8l" }, | |||
| }; | |||
| DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE]; | |||
| DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE]; | |||
| H264PredContext h; | |||
| int test, codec, chroma_format, bit_depth; | |||
| for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) { | |||
| for (codec = 0; codec < 4; codec++) { | |||
| int codec_id = codec_ids[codec]; | |||
| for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++) | |||
| for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) { | |||
| ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format); | |||
| tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth); | |||
| } | |||
| } | |||
| report("%s", tests[test].name); | |||
| } | |||
| } | |||
| @@ -0,0 +1,6 @@ | |||
| CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o | |||
| tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm | |||
| $(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d) | |||
| $(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $< | |||
| -$(STRIP) $(STRIPFLAGS) $@ | |||
| @@ -0,0 +1,193 @@ | |||
| ;***************************************************************************** | |||
| ;* Assembly testing and benchmarking tool | |||
| ;* Copyright (c) 2008 Loren Merritt | |||
| ;* Copyright (c) 2012 Henrik Gramner | |||
| ;* | |||
| ;* This file is part of Libav. | |||
| ;* | |||
| ;* Libav is free software; you can redistribute it and/or modify | |||
| ;* it under the terms of the GNU General Public License as published by | |||
| ;* the Free Software Foundation; either version 2 of the License, or | |||
| ;* (at your option) any later version. | |||
| ;* | |||
| ;* Libav is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| ;* GNU General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU General Public License | |||
| ;* along with this program; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |||
| ;***************************************************************************** | |||
| %define private_prefix checkasm | |||
| %include "libavutil/x86/x86inc.asm" | |||
| SECTION_RODATA | |||
| error_message: db "failed to preserve register", 0 | |||
| %if ARCH_X86_64 | |||
| ; just random numbers to reduce the chance of incidental match | |||
| ALIGN 16 | |||
| x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 | |||
| x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 | |||
| x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e | |||
| x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f | |||
| x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 | |||
| x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d | |||
| x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b | |||
| x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 | |||
| x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef | |||
| x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 | |||
| n7: dq 0x21f86d66c8ca00ce | |||
| n8: dq 0x75b6ba21077c48ad | |||
| n9: dq 0xed56bb2dcb3c7736 | |||
| n10: dq 0x8bda43d3fd1a7e06 | |||
| n11: dq 0xb64a9c9e5d318408 | |||
| n12: dq 0xdf9a54b303f1d3a3 | |||
| n13: dq 0x4a75479abd64e097 | |||
| n14: dq 0x249214109d5d1c88 | |||
| %endif | |||
| SECTION .text | |||
| cextern fail_func | |||
| ; max number of args used by any asm function. | |||
| ; (max_args % 4) must equal 3 for stack alignment | |||
| %define max_args 15 | |||
| %if ARCH_X86_64 | |||
| ;----------------------------------------------------------------------------- | |||
| ; int checkasm_stack_clobber(uint64_t clobber, ...) | |||
| ;----------------------------------------------------------------------------- | |||
| cglobal stack_clobber, 1,2 | |||
| ; Clobber the stack with junk below the stack pointer | |||
| %define size (max_args+6)*8 | |||
| SUB rsp, size | |||
| mov r1, size-8 | |||
| .loop: | |||
| mov [rsp+r1], r0 | |||
| sub r1, 8 | |||
| jge .loop | |||
| ADD rsp, size | |||
| RET | |||
| %if WIN64 | |||
| %assign free_regs 7 | |||
| %else | |||
| %assign free_regs 9 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) | |||
| ;----------------------------------------------------------------------------- | |||
| INIT_XMM | |||
| cglobal checked_call, 2,15,16,max_args*8+8 | |||
| mov r6, r0 | |||
| ; All arguments have been pushed on the stack instead of registers in order to | |||
| ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. | |||
| mov r0, r6mp | |||
| mov r1, r7mp | |||
| mov r2, r8mp | |||
| mov r3, r9mp | |||
| %if UNIX64 | |||
| mov r4, r10mp | |||
| mov r5, r11mp | |||
| %assign i 6 | |||
| %rep max_args-6 | |||
| mov r9, [rsp+stack_offset+(i+1)*8] | |||
| mov [rsp+(i-6)*8], r9 | |||
| %assign i i+1 | |||
| %endrep | |||
| %else | |||
| %assign i 4 | |||
| %rep max_args-4 | |||
| mov r9, [rsp+stack_offset+(i+7)*8] | |||
| mov [rsp+i*8], r9 | |||
| %assign i i+1 | |||
| %endrep | |||
| %endif | |||
| %if WIN64 | |||
| %assign i 6 | |||
| %rep 16-6 | |||
| mova m %+ i, [x %+ i] | |||
| %assign i i+1 | |||
| %endrep | |||
| %endif | |||
| %assign i 14 | |||
| %rep 15-free_regs | |||
| mov r %+ i, [n %+ i] | |||
| %assign i i-1 | |||
| %endrep | |||
| call r6 | |||
| %assign i 14 | |||
| %rep 15-free_regs | |||
| xor r %+ i, [n %+ i] | |||
| or r14, r %+ i | |||
| %assign i i-1 | |||
| %endrep | |||
| %if WIN64 | |||
| %assign i 6 | |||
| %rep 16-6 | |||
| pxor m %+ i, [x %+ i] | |||
| por m6, m %+ i | |||
| %assign i i+1 | |||
| %endrep | |||
| packsswb m6, m6 | |||
| movq r5, m6 | |||
| or r14, r5 | |||
| %endif | |||
| jz .ok | |||
| mov r9, rax | |||
| lea r0, [error_message] | |||
| call fail_func | |||
| mov rax, r9 | |||
| .ok: | |||
| RET | |||
| %else | |||
| ; just random numbers to reduce the chance of incidental match | |||
| %define n3 dword 0x6549315c | |||
| %define n4 dword 0xe02f3e23 | |||
| %define n5 dword 0xb78d0d1d | |||
| %define n6 dword 0x33627ba7 | |||
| ;----------------------------------------------------------------------------- | |||
| ; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) | |||
| ;----------------------------------------------------------------------------- | |||
| cglobal checked_call, 1,7 | |||
| mov r3, n3 | |||
| mov r4, n4 | |||
| mov r5, n5 | |||
| mov r6, n6 | |||
| %rep max_args | |||
| PUSH dword [esp+20+max_args*4] | |||
| %endrep | |||
| call r0 | |||
| xor r3, n3 | |||
| xor r4, n4 | |||
| xor r5, n5 | |||
| xor r6, n6 | |||
| or r3, r4 | |||
| or r5, r6 | |||
| or r3, r5 | |||
| jz .ok | |||
| mov r3, eax | |||
| lea r0, [error_message] | |||
| mov [esp], r0 | |||
| call fail_func | |||
| mov eax, r3 | |||
| .ok: | |||
| add esp, max_args*4 | |||
| REP_RET | |||
| %endif ; ARCH_X86_64 | |||