2 * Assembly testing and benchmarking tool
3 * Copyright (c) 2015 Henrik Gramner
4 * Copyright (c) 2008 Loren Merritt
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #include "libavutil/common.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/random_seed.h"
33 #include "libavutil/x86/cpu.h"
36 #if HAVE_SETCONSOLETEXTATTRIBUTE
38 #define COLOR_RED FOREGROUND_RED
39 #define COLOR_GREEN FOREGROUND_GREEN
40 #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
44 #define COLOR_YELLOW 3
55 /* List of tests to invoke */
56 static void (* const tests[])(void) = {
58 checkasm_check_h264pred,
63 /* List of cpu flags to check */
70 { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
71 { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
72 { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
73 { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
74 { "SSE", "sse", AV_CPU_FLAG_SSE },
75 { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
76 { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
77 { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
78 { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
79 { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
80 { "AVX", "avx", AV_CPU_FLAG_AVX },
81 { "XOP", "xop", AV_CPU_FLAG_XOP },
82 { "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
83 { "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
84 { "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
89 typedef struct CheckasmFuncVersion {
90 struct CheckasmFuncVersion *next;
96 } CheckasmFuncVersion;
98 /* Binary search tree node */
99 typedef struct CheckasmFunc {
100 struct CheckasmFunc *child[2];
101 CheckasmFuncVersion versions;
108 CheckasmFunc *current_func;
109 CheckasmFuncVersion *current_func_ver;
110 const char *bench_pattern;
111 int bench_pattern_len;
116 const char *cpu_flag_name;
122 /* Print colored text to stderr if the terminal supports it */
123 static void color_printf(int color, const char *fmt, ...)
125 static int use_color = -1;
128 #if HAVE_SETCONSOLETEXTATTRIBUTE
130 static WORD org_attributes;
133 CONSOLE_SCREEN_BUFFER_INFO con_info;
134 con = GetStdHandle(STD_ERROR_HANDLE);
135 if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
136 org_attributes = con_info.wAttributes;
142 SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
145 const char *term = getenv("TERM");
146 use_color = term && strcmp(term, "dumb") && isatty(2);
149 fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
153 vfprintf(stderr, fmt, arg);
157 #if HAVE_SETCONSOLETEXTATTRIBUTE
158 SetConsoleTextAttribute(con, org_attributes);
160 fprintf(stderr, "\x1b[0m");
165 /* Deallocate a tree */
166 static void destroy_func_tree(CheckasmFunc *f)
169 CheckasmFuncVersion *v = f->versions.next;
171 CheckasmFuncVersion *next = v->next;
176 destroy_func_tree(f->child[0]);
177 destroy_func_tree(f->child[1]);
182 /* Allocate a zero-initialized block, clean up and exit on failure */
183 static void *checkasm_malloc(size_t size)
185 void *ptr = calloc(1, size);
187 fprintf(stderr, "checkasm: malloc failed\n");
188 destroy_func_tree(state.funcs);
194 /* Get the suffix of the specified cpu flag */
195 static const char *cpu_suffix(int cpu)
197 int i = FF_ARRAY_ELEMS(cpus);
200 if (cpu & cpus[i].flag)
201 return cpus[i].suffix;
207 static int cmp_nop(const void *a, const void *b)
209 return *(const uint16_t*)a - *(const uint16_t*)b;
212 /* Measure the overhead of the timing code (in decicycles) */
213 static int measure_nop_time(void)
215 uint16_t nops[10000];
218 for (i = 0; i < 10000; i++) {
219 uint64_t t = AV_READ_TIME();
220 nops[i] = AV_READ_TIME() - t;
223 qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
224 for (i = 2500; i < 7500; i++)
227 return nop_sum / 500;
230 /* Print benchmark results */
231 static void print_benchs(CheckasmFunc *f)
234 print_benchs(f->child[0]);
236 /* Only print functions with at least one assembly version */
237 if (f->versions.cpu || f->versions.next) {
238 CheckasmFuncVersion *v = &f->versions;
241 int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4;
242 printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
244 } while ((v = v->next));
247 print_benchs(f->child[1]);
252 /* ASCIIbetical sort except preserving natural order for numbers */
253 static int cmp_func_names(const char *a, const char *b)
255 int ascii_diff, digit_diff;
257 for (; !(ascii_diff = *a - *b) && *a; a++, b++);
258 for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
260 return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff;
263 /* Get a node with the specified name, creating it if it doesn't exist */
264 static CheckasmFunc *get_func(const char *name, int length)
266 CheckasmFunc *f, **f_ptr = &state.funcs;
268 /* Search the tree for a matching node */
269 while ((f = *f_ptr)) {
270 int cmp = cmp_func_names(name, f->name);
274 f_ptr = &f->child[(cmp > 0)];
277 /* Allocate and insert a new node into the tree */
278 f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length);
279 memcpy(f->name, name, length+1);
284 /* Perform tests and benchmarks for the specified cpu flag if supported by the host */
285 static void check_cpu_flag(const char *name, int flag)
287 int old_cpu_flag = state.cpu_flag;
289 flag |= old_cpu_flag;
290 av_set_cpu_flags_mask(flag);
291 state.cpu_flag = av_get_cpu_flags();
293 if (!flag || state.cpu_flag != old_cpu_flag) {
296 state.cpu_flag_name = name;
297 for (i = 0; tests[i]; i++)
302 /* Print the name of the current CPU flag, but only do it once */
303 static void print_cpu_name(void)
305 if (state.cpu_flag_name) {
306 color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
307 state.cpu_flag_name = NULL;
311 int main(int argc, char *argv[])
313 int i, seed, ret = 0;
315 if (!tests[0] || !cpus[0].flag) {
316 fprintf(stderr, "checkasm: no tests to perform\n");
320 if (argc > 1 && !strncmp(argv[1], "--bench", 7)) {
322 fprintf(stderr, "checkasm: --bench is not supported on your system\n");
325 if (argv[1][7] == '=') {
326 state.bench_pattern = argv[1] + 8;
327 state.bench_pattern_len = strlen(state.bench_pattern);
329 state.bench_pattern = "";
335 seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed();
336 fprintf(stderr, "checkasm: using random seed %u\n", seed);
337 av_lfg_init(&checkasm_lfg, seed);
339 check_cpu_flag(NULL, 0);
340 for (i = 0; cpus[i].flag; i++)
341 check_cpu_flag(cpus[i].name, cpus[i].flag);
343 if (state.num_failed) {
344 fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
347 fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
349 if (state.bench_pattern) {
350 state.nop_time = measure_nop_time();
351 printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
352 print_benchs(state.funcs);
357 destroy_func_tree(state.funcs);
361 /* Decide whether or not the specified function needs to be tested and
362 * allocate/initialize data structures if needed. Returns a pointer to a
363 * reference function if the function should be tested, otherwise NULL */
364 intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))()
367 intptr_t (*ref)() = func;
368 CheckasmFuncVersion *v;
373 name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
376 if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
379 state.current_func = get_func(name_buf, name_length);
380 v = &state.current_func->versions;
383 CheckasmFuncVersion *prev;
385 /* Only test functions that haven't already been tested */
393 } while ((v = v->next));
395 v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
400 v->cpu = state.cpu_flag;
401 state.current_func_ver = v;
409 /* Decide whether or not the current function needs to be benchmarked */
410 int checkasm_bench_func(void)
412 return !state.num_failed && state.bench_pattern &&
413 !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
416 /* Indicate that the current test has failed */
417 void checkasm_fail_func(const char *msg, ...)
419 if (state.current_func_ver->cpu && state.current_func_ver->ok) {
423 fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
425 vfprintf(stderr, msg, arg);
427 fprintf(stderr, ")\n");
429 state.current_func_ver->ok = 0;
434 /* Update benchmark results of the current function */
435 void checkasm_update_bench(int iterations, uint64_t cycles)
437 state.current_func_ver->iterations += iterations;
438 state.current_func_ver->cycles += cycles;
441 /* Print the outcome of all tests performed since the last time this function was called */
442 void checkasm_report(const char *name, ...)
444 static int prev_checked, prev_failed, max_length;
446 if (state.num_checked > prev_checked) {
450 int pad_length = max_length;
453 fprintf(stderr, " - ");
455 pad_length -= vfprintf(stderr, name, arg);
457 fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
459 fprintf(stderr, " - %-*s [", max_length, state.current_func->name);
461 if (state.num_failed == prev_failed)
462 color_printf(COLOR_GREEN, "OK");
464 color_printf(COLOR_RED, "FAILED");
465 fprintf(stderr, "]\n");
467 prev_checked = state.num_checked;
468 prev_failed = state.num_failed;
469 } else if (!state.cpu_flag) {
472 /* Calculate the amount of padding required to make the output vertically aligned */
476 length = vsnprintf(NULL, 0, name, arg);
479 length = strlen(state.current_func->name);
481 if (length > max_length)