]> git.sesse.net Git - nageru/blob - futatabi/gpu_timers.cpp
Reintroduce faster DeckLink shutdown; now with a fix for the UI switcher.
[nageru] / futatabi / gpu_timers.cpp
1 #include "gpu_timers.h"
2
3 #include <epoxy/gl.h>
4
5 using namespace std;
6
7 bool enable_timing = false;
8 bool detailed_timing = false;
9 bool in_warmup = false;
10
11 pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
12 {
13         if (!enable_timing) {
14                 return make_pair(0, 0);
15         }
16
17         GLuint queries[2];
18         glGenQueries(2, queries);
19         glQueryCounter(queries[0], GL_TIMESTAMP);
20
21         Timer timer;
22         timer.name = name;
23         timer.level = level;
24         timer.query.first = queries[0];
25         timer.query.second = queries[1];
26         timers.push_back(timer);
27         return timer.query;
28 }
29
30 GLint64 find_elapsed(pair<GLuint, GLuint> queries)
31 {
32         // NOTE: This makes the CPU wait for the GPU.
33         GLuint64 time_start, time_end;
34         glGetQueryObjectui64v(queries.first, GL_QUERY_RESULT, &time_start);
35         glGetQueryObjectui64v(queries.second, GL_QUERY_RESULT, &time_end);
36         return time_end - time_start;
37 }
38
39 void GPUTimers::print()
40 {
41         for (size_t i = 0; i < timers.size(); ++i) {
42                 if (timers[i].level >= 4 && !detailed_timing) {
43                         // In practice, only affects the SOR sub-timers.
44                         continue;
45                 }
46
47                 GLint64 time_elapsed = find_elapsed(timers[i].query);
48                 for (int j = 0; j < timers[i].level * 2; ++j) {
49                         fprintf(stderr, " ");
50                 }
51
52                 if (detailed_timing) {
53                         // Look for any immediate subtimers, and see if they sum to the large one.
54                         size_t num_subtimers = 0;
55                         GLint64 sum_subtimers = 0;
56                         for (size_t j = i + 1; j < timers.size() && timers[j].level > timers[i].level; ++j) {
57                                 if (timers[j].level != timers[i].level + 1)
58                                         continue;
59                                 ++num_subtimers;
60                                 sum_subtimers += find_elapsed(timers[j].query);
61                         }
62
63                         if (num_subtimers > 0 && (time_elapsed - sum_subtimers) / 1e6 >= 0.01) {
64                                 fprintf(stderr, "%-30s %4.3f ms [%4.3f ms unaccounted for]\n", timers[i].name.c_str(), time_elapsed / 1e6, (time_elapsed - sum_subtimers) / 1e6);
65                         } else {
66                                 fprintf(stderr, "%-30s %4.3f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
67                         }
68                 } else {
69                         fprintf(stderr, "%-30s %4.1f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
70                 }
71         }
72 }