15 #include "alpha_division_effect.h"
16 #include "alpha_multiplication_effect.h"
17 #include "colorspace_conversion_effect.h"
18 #include "dither_effect.h"
20 #include "effect_chain.h"
21 #include "effect_util.h"
22 #include "gamma_compression_effect.h"
23 #include "gamma_expansion_effect.h"
26 #include "resource_pool.h"
28 #include "ycbcr_conversion_effect.h"
30 using namespace Eigen;
37 // An effect whose only purpose is to sit in a phase on its own and take the
38 // texture output from a compute shader and display it to the normal backbuffer
39 // (or any FBO). That phase can be skipped when rendering using render_to_textures().
40 class ComputeShaderOutputDisplayEffect : public Effect {
42 ComputeShaderOutputDisplayEffect() {}
43 string effect_type_id() const override { return "ComputeShaderOutputDisplayEffect"; }
44 string output_fragment_shader() override { return read_file("identity.frag"); }
45 bool needs_texture_bounce() const override { return true; }
50 EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool)
51 : aspect_nom(aspect_nom),
52 aspect_denom(aspect_denom),
53 output_color_rgba(false),
54 num_output_color_ycbcr(0),
55 dither_effect(nullptr),
56 ycbcr_conversion_effect_node(nullptr),
57 intermediate_format(GL_RGBA16F),
58 intermediate_transformation(NO_FRAMEBUFFER_TRANSFORMATION),
60 output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT),
62 resource_pool(resource_pool),
63 do_phase_timing(false) {
64 if (resource_pool == nullptr) {
65 this->resource_pool = new ResourcePool();
66 owns_resource_pool = true;
68 owns_resource_pool = false;
71 // Generate a VBO with some data in (shared position and texture coordinate data).
77 vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
80 EffectChain::~EffectChain()
82 for (unsigned i = 0; i < nodes.size(); ++i) {
83 delete nodes[i]->effect;
86 for (unsigned i = 0; i < phases.size(); ++i) {
87 resource_pool->release_glsl_program(phases[i]->glsl_program_num);
90 if (owns_resource_pool) {
93 glDeleteBuffers(1, &vbo);
97 Input *EffectChain::add_input(Input *input)
100 inputs.push_back(input);
105 void EffectChain::add_output(const ImageFormat &format, OutputAlphaFormat alpha_format)
108 assert(!output_color_rgba);
109 output_format = format;
110 output_alpha_format = alpha_format;
111 output_color_rgba = true;
114 void EffectChain::add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format,
115 const YCbCrFormat &ycbcr_format, YCbCrOutputSplitting output_splitting,
119 assert(num_output_color_ycbcr < 2);
120 output_format = format;
121 output_alpha_format = alpha_format;
123 if (num_output_color_ycbcr == 1) {
124 // Check that the format is the same.
125 assert(output_ycbcr_format.luma_coefficients == ycbcr_format.luma_coefficients);
126 assert(output_ycbcr_format.full_range == ycbcr_format.full_range);
127 assert(output_ycbcr_format.num_levels == ycbcr_format.num_levels);
128 assert(output_ycbcr_format.chroma_subsampling_x == 1);
129 assert(output_ycbcr_format.chroma_subsampling_y == 1);
130 assert(output_ycbcr_type == output_type);
132 output_ycbcr_format = ycbcr_format;
133 output_ycbcr_type = output_type;
135 output_ycbcr_splitting[num_output_color_ycbcr++] = output_splitting;
137 assert(ycbcr_format.chroma_subsampling_x == 1);
138 assert(ycbcr_format.chroma_subsampling_y == 1);
141 void EffectChain::change_ycbcr_output_format(const YCbCrFormat &ycbcr_format)
143 assert(num_output_color_ycbcr > 0);
144 assert(output_ycbcr_format.chroma_subsampling_x == 1);
145 assert(output_ycbcr_format.chroma_subsampling_y == 1);
147 output_ycbcr_format = ycbcr_format;
149 YCbCrConversionEffect *effect = (YCbCrConversionEffect *)(ycbcr_conversion_effect_node->effect);
150 effect->change_output_format(ycbcr_format);
154 Node *EffectChain::add_node(Effect *effect)
156 for (unsigned i = 0; i < nodes.size(); ++i) {
157 assert(nodes[i]->effect != effect);
160 Node *node = new Node;
161 node->effect = effect;
162 node->disabled = false;
163 node->output_color_space = COLORSPACE_INVALID;
164 node->output_gamma_curve = GAMMA_INVALID;
165 node->output_alpha_type = ALPHA_INVALID;
166 node->needs_mipmaps = Effect::DOES_NOT_NEED_MIPMAPS;
167 node->one_to_one_sampling = false;
168 node->strong_one_to_one_sampling = false;
170 nodes.push_back(node);
171 node_map[effect] = node;
172 effect->inform_added(this);
176 void EffectChain::connect_nodes(Node *sender, Node *receiver)
178 sender->outgoing_links.push_back(receiver);
179 receiver->incoming_links.push_back(sender);
182 void EffectChain::replace_receiver(Node *old_receiver, Node *new_receiver)
184 new_receiver->incoming_links = old_receiver->incoming_links;
185 old_receiver->incoming_links.clear();
187 for (unsigned i = 0; i < new_receiver->incoming_links.size(); ++i) {
188 Node *sender = new_receiver->incoming_links[i];
189 for (unsigned j = 0; j < sender->outgoing_links.size(); ++j) {
190 if (sender->outgoing_links[j] == old_receiver) {
191 sender->outgoing_links[j] = new_receiver;
197 void EffectChain::replace_sender(Node *old_sender, Node *new_sender)
199 new_sender->outgoing_links = old_sender->outgoing_links;
200 old_sender->outgoing_links.clear();
202 for (unsigned i = 0; i < new_sender->outgoing_links.size(); ++i) {
203 Node *receiver = new_sender->outgoing_links[i];
204 for (unsigned j = 0; j < receiver->incoming_links.size(); ++j) {
205 if (receiver->incoming_links[j] == old_sender) {
206 receiver->incoming_links[j] = new_sender;
212 void EffectChain::insert_node_between(Node *sender, Node *middle, Node *receiver)
214 for (unsigned i = 0; i < sender->outgoing_links.size(); ++i) {
215 if (sender->outgoing_links[i] == receiver) {
216 sender->outgoing_links[i] = middle;
217 middle->incoming_links.push_back(sender);
220 for (unsigned i = 0; i < receiver->incoming_links.size(); ++i) {
221 if (receiver->incoming_links[i] == sender) {
222 receiver->incoming_links[i] = middle;
223 middle->outgoing_links.push_back(receiver);
227 assert(middle->incoming_links.size() == middle->effect->num_inputs());
230 GLenum EffectChain::get_input_sampler(Node *node, unsigned input_num) const
232 assert(node->effect->needs_texture_bounce());
233 assert(input_num < node->incoming_links.size());
234 assert(node->incoming_links[input_num]->bound_sampler_num >= 0);
235 assert(node->incoming_links[input_num]->bound_sampler_num < 8);
236 return GL_TEXTURE0 + node->incoming_links[input_num]->bound_sampler_num;
239 GLenum EffectChain::has_input_sampler(Node *node, unsigned input_num) const
241 assert(input_num < node->incoming_links.size());
242 return node->incoming_links[input_num]->bound_sampler_num >= 0 &&
243 node->incoming_links[input_num]->bound_sampler_num < 8;
246 void EffectChain::find_all_nonlinear_inputs(Node *node, vector<Node *> *nonlinear_inputs)
248 if (node->output_gamma_curve == GAMMA_LINEAR &&
249 node->effect->effect_type_id() != "GammaCompressionEffect") {
252 if (node->effect->num_inputs() == 0) {
253 nonlinear_inputs->push_back(node);
255 assert(node->effect->num_inputs() == node->incoming_links.size());
256 for (unsigned i = 0; i < node->incoming_links.size(); ++i) {
257 find_all_nonlinear_inputs(node->incoming_links[i], nonlinear_inputs);
262 Effect *EffectChain::add_effect(Effect *effect, const vector<Effect *> &inputs)
265 assert(inputs.size() == effect->num_inputs());
266 Node *node = add_node(effect);
267 for (unsigned i = 0; i < inputs.size(); ++i) {
268 assert(node_map.count(inputs[i]) != 0);
269 connect_nodes(node_map[inputs[i]], node);
274 // ESSL doesn't support token pasting. Replace PREFIX(x) with <effect_id>_x.
275 string replace_prefix(const string &text, const string &prefix)
280 while (start < text.size()) {
281 size_t pos = text.find("PREFIX(", start);
282 if (pos == string::npos) {
283 output.append(text.substr(start, string::npos));
287 output.append(text.substr(start, pos - start));
288 output.append(prefix);
291 pos += strlen("PREFIX(");
293 // Output stuff until we find the matching ), which we then eat.
295 size_t end_arg_pos = pos;
296 while (end_arg_pos < text.size()) {
297 if (text[end_arg_pos] == '(') {
299 } else if (text[end_arg_pos] == ')') {
307 output.append(text.substr(pos, end_arg_pos - pos));
318 void extract_uniform_declarations(const vector<Uniform<T>> &effect_uniforms,
319 const string &type_specifier,
320 const string &effect_id,
321 vector<Uniform<T>> *phase_uniforms,
324 for (unsigned i = 0; i < effect_uniforms.size(); ++i) {
325 phase_uniforms->push_back(effect_uniforms[i]);
326 phase_uniforms->back().prefix = effect_id;
328 *glsl_string += string("uniform ") + type_specifier + " " + effect_id
329 + "_" + effect_uniforms[i].name + ";\n";
334 void extract_uniform_array_declarations(const vector<Uniform<T>> &effect_uniforms,
335 const string &type_specifier,
336 const string &effect_id,
337 vector<Uniform<T>> *phase_uniforms,
340 for (unsigned i = 0; i < effect_uniforms.size(); ++i) {
341 phase_uniforms->push_back(effect_uniforms[i]);
342 phase_uniforms->back().prefix = effect_id;
345 snprintf(buf, sizeof(buf), "uniform %s %s_%s[%d];\n",
346 type_specifier.c_str(), effect_id.c_str(),
347 effect_uniforms[i].name.c_str(),
348 int(effect_uniforms[i].num_values));
354 void collect_uniform_locations(GLuint glsl_program_num, vector<Uniform<T>> *phase_uniforms)
356 for (unsigned i = 0; i < phase_uniforms->size(); ++i) {
357 Uniform<T> &uniform = (*phase_uniforms)[i];
358 uniform.location = get_uniform_location(glsl_program_num, uniform.prefix, uniform.name);
364 void EffectChain::compile_glsl_program(Phase *phase)
366 string frag_shader_header;
367 if (phase->is_compute_shader) {
368 frag_shader_header = read_file("header.comp");
370 frag_shader_header = read_version_dependent_file("header", "frag");
372 string frag_shader = "";
374 // Create functions and uniforms for all the texture inputs that we need.
375 for (unsigned i = 0; i < phase->inputs.size(); ++i) {
376 Node *input = phase->inputs[i]->output_node;
378 sprintf(effect_id, "in%u", i);
379 phase->effect_ids.insert(make_pair(make_pair(input, IN_ANOTHER_PHASE), effect_id));
381 frag_shader += string("uniform sampler2D tex_") + effect_id + ";\n";
382 frag_shader += string("vec4 ") + effect_id + "(vec2 tc) {\n";
383 frag_shader += "\tvec4 tmp = tex2D(tex_" + string(effect_id) + ", tc);\n";
385 if (intermediate_transformation == SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION &&
386 phase->inputs[i]->output_node->output_gamma_curve == GAMMA_LINEAR) {
387 frag_shader += "\ttmp.rgb *= tmp.rgb;\n";
390 frag_shader += "\treturn tmp;\n";
391 frag_shader += "}\n";
394 Uniform<int> uniform;
395 uniform.name = effect_id;
396 uniform.value = &phase->input_samplers[i];
397 uniform.prefix = "tex";
398 uniform.num_values = 1;
399 uniform.location = -1;
400 phase->uniforms_sampler2d.push_back(uniform);
403 // Give each effect in the phase its own ID.
404 for (unsigned i = 0; i < phase->effects.size(); ++i) {
405 Node *node = phase->effects[i];
407 sprintf(effect_id, "eff%u", i);
408 bool inserted = phase->effect_ids.insert(make_pair(make_pair(node, IN_SAME_PHASE), effect_id)).second;
412 for (unsigned i = 0; i < phase->effects.size(); ++i) {
413 Node *node = phase->effects[i];
414 const string effect_id = phase->effect_ids[make_pair(node, IN_SAME_PHASE)];
415 if (node->incoming_links.size() == 1) {
416 Node *input = node->incoming_links[0];
417 NodeLinkType link_type = node->incoming_link_type[0];
418 if (i != 0 && input->effect->is_compute_shader()) {
419 // First effect after the compute shader reads the value
420 // that cs_output() wrote to a global variable.
421 frag_shader += string("#define INPUT(tc) CS_OUTPUT_VAL\n");
423 frag_shader += string("#define INPUT ") + phase->effect_ids[make_pair(input, link_type)] + "\n";
426 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
427 assert(!node->incoming_links[j]->effect->is_compute_shader());
429 string effect_id = phase->effect_ids[make_pair(node->incoming_links[j], node->incoming_link_type[j])];
430 sprintf(buf, "#define INPUT%d %s\n", j + 1, effect_id.c_str());
436 frag_shader += string("#define FUNCNAME ") + effect_id + "\n";
437 if (node->effect->is_compute_shader()) {
438 frag_shader += string("#define NORMALIZE_TEXTURE_COORDS(tc) ((tc) * ") + effect_id + "_inv_output_size + " + effect_id + "_output_texcoord_adjust)\n";
440 frag_shader += replace_prefix(node->effect->output_fragment_shader(), effect_id);
441 frag_shader += "#undef FUNCNAME\n";
442 if (node->incoming_links.size() == 1) {
443 frag_shader += "#undef INPUT\n";
445 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
447 sprintf(buf, "#undef INPUT%d\n", j + 1);
453 if (phase->is_compute_shader) {
454 frag_shader += string("#define INPUT ") + phase->effect_ids[make_pair(phase->compute_shader_node, IN_SAME_PHASE)] + "\n";
455 if (phase->compute_shader_node == phase->effects.back()) {
456 // No postprocessing.
457 frag_shader += "#define CS_POSTPROC(tc) CS_OUTPUT_VAL\n";
459 frag_shader += string("#define CS_POSTPROC ") + phase->effect_ids[make_pair(phase->effects.back(), IN_SAME_PHASE)] + "\n";
462 frag_shader += string("#define INPUT ") + phase->effect_ids[make_pair(phase->effects.back(), IN_SAME_PHASE)] + "\n";
465 // If we're the last phase, add the right #defines for Y'CbCr multi-output as needed.
466 vector<string> frag_shader_outputs; // In order.
467 if (phase->output_node->outgoing_links.empty() && num_output_color_ycbcr > 0) {
468 switch (output_ycbcr_splitting[0]) {
469 case YCBCR_OUTPUT_INTERLEAVED:
471 frag_shader_outputs.push_back("FragColor");
473 case YCBCR_OUTPUT_SPLIT_Y_AND_CBCR:
474 frag_shader += "#define YCBCR_OUTPUT_SPLIT_Y_AND_CBCR 1\n";
475 frag_shader_outputs.push_back("Y");
476 frag_shader_outputs.push_back("Chroma");
478 case YCBCR_OUTPUT_PLANAR:
479 frag_shader += "#define YCBCR_OUTPUT_PLANAR 1\n";
480 frag_shader_outputs.push_back("Y");
481 frag_shader_outputs.push_back("Cb");
482 frag_shader_outputs.push_back("Cr");
488 if (num_output_color_ycbcr > 1) {
489 switch (output_ycbcr_splitting[1]) {
490 case YCBCR_OUTPUT_INTERLEAVED:
491 frag_shader += "#define SECOND_YCBCR_OUTPUT_INTERLEAVED 1\n";
492 frag_shader_outputs.push_back("YCbCr2");
494 case YCBCR_OUTPUT_SPLIT_Y_AND_CBCR:
495 frag_shader += "#define SECOND_YCBCR_OUTPUT_SPLIT_Y_AND_CBCR 1\n";
496 frag_shader_outputs.push_back("Y2");
497 frag_shader_outputs.push_back("Chroma2");
499 case YCBCR_OUTPUT_PLANAR:
500 frag_shader += "#define SECOND_YCBCR_OUTPUT_PLANAR 1\n";
501 frag_shader_outputs.push_back("Y2");
502 frag_shader_outputs.push_back("Cb2");
503 frag_shader_outputs.push_back("Cr2");
510 if (output_color_rgba) {
511 // Note: Needs to come in the header, because not only the
512 // output needs to see it (YCbCrConversionEffect and DitherEffect
514 frag_shader_header += "#define YCBCR_ALSO_OUTPUT_RGBA 1\n";
515 frag_shader_outputs.push_back("RGBA");
519 // If we're bouncing to a temporary texture, signal transformation if desired.
520 if (!phase->output_node->outgoing_links.empty()) {
521 if (intermediate_transformation == SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION &&
522 phase->output_node->output_gamma_curve == GAMMA_LINEAR) {
523 frag_shader += "#define SQUARE_ROOT_TRANSFORMATION 1\n";
527 if (phase->is_compute_shader) {
528 frag_shader.append(read_file("footer.comp"));
529 phase->compute_shader_node->effect->register_uniform_ivec2("output_size", phase->uniform_output_size);
530 phase->compute_shader_node->effect->register_uniform_vec2("inv_output_size", (float *)&phase->inv_output_size);
531 phase->compute_shader_node->effect->register_uniform_vec2("output_texcoord_adjust", (float *)&phase->output_texcoord_adjust);
533 frag_shader.append(read_file("footer.frag"));
536 // Collect uniforms from all effects and output them. Note that this needs
537 // to happen after output_fragment_shader(), even though the uniforms come
538 // before in the output source, since output_fragment_shader() is allowed
539 // to register new uniforms (e.g. arrays that are of unknown length until
540 // finalization time).
541 // TODO: Make a uniform block for platforms that support it.
542 string frag_shader_uniforms = "";
543 for (unsigned i = 0; i < phase->effects.size(); ++i) {
544 Node *node = phase->effects[i];
545 Effect *effect = node->effect;
546 const string effect_id = phase->effect_ids[make_pair(node, IN_SAME_PHASE)];
547 extract_uniform_declarations(effect->uniforms_image2d, "image2D", effect_id, &phase->uniforms_image2d, &frag_shader_uniforms);
548 extract_uniform_declarations(effect->uniforms_sampler2d, "sampler2D", effect_id, &phase->uniforms_sampler2d, &frag_shader_uniforms);
549 extract_uniform_declarations(effect->uniforms_bool, "bool", effect_id, &phase->uniforms_bool, &frag_shader_uniforms);
550 extract_uniform_declarations(effect->uniforms_int, "int", effect_id, &phase->uniforms_int, &frag_shader_uniforms);
551 extract_uniform_declarations(effect->uniforms_ivec2, "ivec2", effect_id, &phase->uniforms_ivec2, &frag_shader_uniforms);
552 extract_uniform_declarations(effect->uniforms_float, "float", effect_id, &phase->uniforms_float, &frag_shader_uniforms);
553 extract_uniform_declarations(effect->uniforms_vec2, "vec2", effect_id, &phase->uniforms_vec2, &frag_shader_uniforms);
554 extract_uniform_declarations(effect->uniforms_vec3, "vec3", effect_id, &phase->uniforms_vec3, &frag_shader_uniforms);
555 extract_uniform_declarations(effect->uniforms_vec4, "vec4", effect_id, &phase->uniforms_vec4, &frag_shader_uniforms);
556 extract_uniform_array_declarations(effect->uniforms_float_array, "float", effect_id, &phase->uniforms_float, &frag_shader_uniforms);
557 extract_uniform_array_declarations(effect->uniforms_vec2_array, "vec2", effect_id, &phase->uniforms_vec2, &frag_shader_uniforms);
558 extract_uniform_array_declarations(effect->uniforms_vec3_array, "vec3", effect_id, &phase->uniforms_vec3, &frag_shader_uniforms);
559 extract_uniform_array_declarations(effect->uniforms_vec4_array, "vec4", effect_id, &phase->uniforms_vec4, &frag_shader_uniforms);
560 extract_uniform_declarations(effect->uniforms_mat3, "mat3", effect_id, &phase->uniforms_mat3, &frag_shader_uniforms);
563 string vert_shader = read_version_dependent_file("vs", "vert");
565 // If we're the last phase and need to flip the picture to compensate for
566 // the origin, tell the vertex or compute shader so.
568 if (has_dummy_effect) {
569 is_last_phase = (phase->output_node->outgoing_links.size() == 1 &&
570 phase->output_node->outgoing_links[0]->effect->effect_type_id() == "ComputeShaderOutputDisplayEffect");
572 is_last_phase = phase->output_node->outgoing_links.empty();
574 if (is_last_phase && output_origin == OUTPUT_ORIGIN_TOP_LEFT) {
575 if (phase->is_compute_shader) {
576 frag_shader_header += "#define FLIP_ORIGIN 1\n";
578 const string needle = "#define FLIP_ORIGIN 0";
579 size_t pos = vert_shader.find(needle);
580 assert(pos != string::npos);
582 vert_shader[pos + needle.size() - 1] = '1';
586 frag_shader = frag_shader_header + frag_shader_uniforms + frag_shader;
588 if (phase->is_compute_shader) {
589 phase->glsl_program_num = resource_pool->compile_glsl_compute_program(frag_shader);
591 Uniform<int> uniform;
592 uniform.name = "outbuf";
593 uniform.value = &phase->outbuf_image_unit;
594 uniform.prefix = "tex";
595 uniform.num_values = 1;
596 uniform.location = -1;
597 phase->uniforms_image2d.push_back(uniform);
599 phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs);
601 GLint position_attribute_index = glGetAttribLocation(phase->glsl_program_num, "position");
602 GLint texcoord_attribute_index = glGetAttribLocation(phase->glsl_program_num, "texcoord");
603 if (position_attribute_index != -1) {
604 phase->attribute_indexes.insert(position_attribute_index);
606 if (texcoord_attribute_index != -1) {
607 phase->attribute_indexes.insert(texcoord_attribute_index);
610 // Collect the resulting location numbers for each uniform.
611 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_image2d);
612 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_sampler2d);
613 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_bool);
614 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_int);
615 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_ivec2);
616 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_float);
617 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_vec2);
618 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_vec3);
619 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_vec4);
620 collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_mat3);
623 // Construct GLSL programs, starting at the given effect and following
624 // the chain from there. We end a program every time we come to an effect
625 // marked as "needs texture bounce", one that is used by multiple other
626 // effects, every time we need to bounce due to output size change
627 // (not all size changes require ending), and of course at the end.
629 // We follow a quite simple depth-first search from the output, although
630 // without recursing explicitly within each phase.
631 Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *completed_effects)
633 if (completed_effects->count(output)) {
634 return (*completed_effects)[output];
637 Phase *phase = new Phase;
638 phase->output_node = output;
639 phase->is_compute_shader = false;
640 phase->compute_shader_node = nullptr;
642 // If the output effect has one-to-one sampling, we try to trace this
643 // status down through the dependency chain. This is important in case
644 // we hit an effect that changes output size (and not sets a virtual
645 // output size); if we have one-to-one sampling, we don't have to break
647 output->one_to_one_sampling = output->effect->one_to_one_sampling();
648 output->strong_one_to_one_sampling = output->effect->strong_one_to_one_sampling();
650 // Effects that we have yet to calculate, but that we know should
651 // be in the current phase.
652 stack<Node *> effects_todo_this_phase;
653 effects_todo_this_phase.push(output);
655 while (!effects_todo_this_phase.empty()) {
656 Node *node = effects_todo_this_phase.top();
657 effects_todo_this_phase.pop();
659 assert(node->effect->one_to_one_sampling() >= node->effect->strong_one_to_one_sampling());
661 if (node->effect->needs_mipmaps() != Effect::DOES_NOT_NEED_MIPMAPS) {
662 // Can't have incompatible requirements imposed on us from a dependent effect;
663 // if so, it should have started a new phase instead.
664 assert(node->needs_mipmaps == Effect::DOES_NOT_NEED_MIPMAPS ||
665 node->needs_mipmaps == node->effect->needs_mipmaps());
666 node->needs_mipmaps = node->effect->needs_mipmaps();
669 // This should currently only happen for effects that are inputs
670 // (either true inputs or phase outputs). We special-case inputs,
671 // and then deduplicate phase outputs below.
672 if (node->effect->num_inputs() == 0) {
673 if (find(phase->effects.begin(), phase->effects.end(), node) != phase->effects.end()) {
677 assert(completed_effects->count(node) == 0);
680 phase->effects.push_back(node);
681 if (node->effect->is_compute_shader()) {
682 phase->is_compute_shader = true;
683 phase->compute_shader_node = node;
686 // Find all the dependencies of this effect, and add them to the stack.
687 vector<Node *> deps = node->incoming_links;
688 assert(node->effect->num_inputs() == deps.size());
689 for (unsigned i = 0; i < deps.size(); ++i) {
690 bool start_new_phase = false;
692 if (node->effect->needs_texture_bounce() &&
693 !deps[i]->effect->is_single_texture() &&
694 !deps[i]->effect->override_disable_bounce()) {
695 start_new_phase = true;
698 // Propagate information about needing mipmaps down the chain,
699 // breaking the phase if we notice an incompatibility.
701 // Note that we cannot do this propagation as a normal pass,
702 // because it needs information about where the phases end
703 // (we should not propagate the flag across phases).
704 if (node->needs_mipmaps != Effect::DOES_NOT_NEED_MIPMAPS) {
705 // The node can have a value set (ie. not DOES_NOT_NEED_MIPMAPS)
706 // if we have diamonds in the graph; if so, choose that.
707 // If not, the effect on the node can also decide (this is the
708 // more common case).
709 Effect::MipmapRequirements dep_mipmaps = deps[i]->needs_mipmaps;
710 if (dep_mipmaps == Effect::DOES_NOT_NEED_MIPMAPS) {
711 if (deps[i]->effect->num_inputs() == 0) {
712 Input *input = static_cast<Input *>(deps[i]->effect);
713 dep_mipmaps = input->can_supply_mipmaps() ? Effect::DOES_NOT_NEED_MIPMAPS : Effect::CANNOT_ACCEPT_MIPMAPS;
715 dep_mipmaps = deps[i]->effect->needs_mipmaps();
718 if (dep_mipmaps == Effect::DOES_NOT_NEED_MIPMAPS) {
719 deps[i]->needs_mipmaps = node->needs_mipmaps;
720 } else if (dep_mipmaps != node->needs_mipmaps) {
721 // The dependency cannot supply our mipmap demands
722 // (either because it's an input that can't do mipmaps,
723 // or because there's a conflict between mipmap-needing
724 // and mipmap-refusing effects somewhere in the graph),
725 // so they cannot be in the same phase.
726 start_new_phase = true;
730 if (deps[i]->outgoing_links.size() > 1) {
731 if (!deps[i]->effect->is_single_texture()) {
732 // More than one effect uses this as the input,
733 // and it is not a texture itself.
734 // The easiest thing to do (and probably also the safest
735 // performance-wise in most cases) is to bounce it to a texture
736 // and then let the next passes read from that.
737 start_new_phase = true;
739 assert(deps[i]->effect->num_inputs() == 0);
741 // For textures, we try to be slightly more clever;
742 // if none of our outputs need a bounce, we don't bounce
743 // but instead simply use the effect many times.
745 // Strictly speaking, we could bounce it for some outputs
746 // and use it directly for others, but the processing becomes
747 // somewhat simpler if the effect is only used in one such way.
748 for (unsigned j = 0; j < deps[i]->outgoing_links.size(); ++j) {
749 Node *rdep = deps[i]->outgoing_links[j];
750 start_new_phase |= rdep->effect->needs_texture_bounce();
755 if (deps[i]->effect->is_compute_shader()) {
756 // Only one compute shader per phase; we should have been stopped
757 // already due to the fact that compute shaders are not one-to-one.
758 assert(!phase->is_compute_shader);
760 // If all nodes so far are strong one-to-one, we can put them after
761 // the compute shader (ie., process them on the output).
762 start_new_phase = !node->strong_one_to_one_sampling;
763 } else if (deps[i]->effect->sets_virtual_output_size()) {
764 assert(deps[i]->effect->changes_output_size());
765 // If the next effect sets a virtual size to rely on OpenGL's
766 // bilinear sampling, we'll really need to break the phase here.
767 start_new_phase = true;
768 } else if (deps[i]->effect->changes_output_size() && !node->one_to_one_sampling) {
769 // If the next effect changes size and we don't have one-to-one sampling,
770 // we also need to break here.
771 start_new_phase = true;
774 if (start_new_phase) {
775 phase->inputs.push_back(construct_phase(deps[i], completed_effects));
777 effects_todo_this_phase.push(deps[i]);
779 // Propagate the one-to-one status down through the dependency.
780 deps[i]->one_to_one_sampling = node->one_to_one_sampling &&
781 deps[i]->effect->one_to_one_sampling();
782 deps[i]->strong_one_to_one_sampling = node->strong_one_to_one_sampling &&
783 deps[i]->effect->strong_one_to_one_sampling();
786 node->incoming_link_type.push_back(start_new_phase ? IN_ANOTHER_PHASE : IN_SAME_PHASE);
790 // No more effects to do this phase. Take all the ones we have,
791 // and create a GLSL program for it.
792 assert(!phase->effects.empty());
794 // Deduplicate the inputs, but don't change the ordering e.g. by sorting;
795 // that would be nondeterministic and thus reduce cacheability.
796 // TODO: Make this even more deterministic.
797 vector<Phase *> dedup_inputs;
798 set<Phase *> seen_inputs;
799 for (size_t i = 0; i < phase->inputs.size(); ++i) {
800 if (seen_inputs.insert(phase->inputs[i]).second) {
801 dedup_inputs.push_back(phase->inputs[i]);
804 swap(phase->inputs, dedup_inputs);
806 // Allocate samplers for each input.
807 phase->input_samplers.resize(phase->inputs.size());
809 // We added the effects from the output and back, but we need to output
810 // them in topological sort order in the shader.
811 phase->effects = topological_sort(phase->effects);
813 // Figure out if we need mipmaps or not, and if so, tell the inputs that.
814 // (RTT inputs have different logic, which is checked in execute_phase().)
815 for (unsigned i = 0; i < phase->effects.size(); ++i) {
816 Node *node = phase->effects[i];
817 if (node->effect->num_inputs() == 0) {
818 Input *input = static_cast<Input *>(node->effect);
819 assert(node->needs_mipmaps != Effect::NEEDS_MIPMAPS || input->can_supply_mipmaps());
820 CHECK(input->set_int("needs_mipmaps", node->needs_mipmaps == Effect::NEEDS_MIPMAPS));
824 // Tell each node which phase it ended up in, so that the unit test
825 // can check that the phases were split in the right place.
826 // Note that this ignores that effects may be part of multiple phases;
827 // if the unit tests need to test such cases, we'll reconsider.
828 for (unsigned i = 0; i < phase->effects.size(); ++i) {
829 phase->effects[i]->containing_phase = phase;
832 // Actually make the shader for this phase.
833 compile_glsl_program(phase);
835 // Initialize timers.
836 if (movit_timer_queries_supported) {
837 phase->time_elapsed_ns = 0;
838 phase->num_measured_iterations = 0;
841 assert(completed_effects->count(output) == 0);
842 completed_effects->insert(make_pair(output, phase));
843 phases.push_back(phase);
847 void EffectChain::output_dot(const char *filename)
849 if (movit_debug_level != MOVIT_DEBUG_ON) {
853 FILE *fp = fopen(filename, "w");
859 fprintf(fp, "digraph G {\n");
860 fprintf(fp, " output [shape=box label=\"(output)\"];\n");
861 for (unsigned i = 0; i < nodes.size(); ++i) {
862 // Find out which phase this event belongs to.
863 vector<int> in_phases;
864 for (unsigned j = 0; j < phases.size(); ++j) {
865 const Phase* p = phases[j];
866 if (find(p->effects.begin(), p->effects.end(), nodes[i]) != p->effects.end()) {
867 in_phases.push_back(j);
871 if (in_phases.empty()) {
872 fprintf(fp, " n%ld [label=\"%s\"];\n", (long)nodes[i], nodes[i]->effect->effect_type_id().c_str());
873 } else if (in_phases.size() == 1) {
874 fprintf(fp, " n%ld [label=\"%s\" style=\"filled\" fillcolor=\"/accent8/%d\"];\n",
875 (long)nodes[i], nodes[i]->effect->effect_type_id().c_str(),
876 (in_phases[0] % 8) + 1);
878 // If we had new enough Graphviz, style="wedged" would probably be ideal here.
880 fprintf(fp, " n%ld [label=\"%s [in multiple phases]\" style=\"filled\" fillcolor=\"/accent8/%d\"];\n",
881 (long)nodes[i], nodes[i]->effect->effect_type_id().c_str(),
882 (in_phases[0] % 8) + 1);
885 char from_node_id[256];
886 snprintf(from_node_id, 256, "n%ld", (long)nodes[i]);
888 for (unsigned j = 0; j < nodes[i]->outgoing_links.size(); ++j) {
889 char to_node_id[256];
890 snprintf(to_node_id, 256, "n%ld", (long)nodes[i]->outgoing_links[j]);
892 vector<string> labels = get_labels_for_edge(nodes[i], nodes[i]->outgoing_links[j]);
893 output_dot_edge(fp, from_node_id, to_node_id, labels);
896 if (nodes[i]->outgoing_links.empty() && !nodes[i]->disabled) {
898 vector<string> labels = get_labels_for_edge(nodes[i], nullptr);
899 output_dot_edge(fp, from_node_id, "output", labels);
907 vector<string> EffectChain::get_labels_for_edge(const Node *from, const Node *to)
909 vector<string> labels;
911 if (to != nullptr && to->effect->needs_texture_bounce()) {
912 labels.push_back("needs_bounce");
914 if (from->effect->changes_output_size()) {
915 labels.push_back("resize");
918 switch (from->output_color_space) {
919 case COLORSPACE_INVALID:
920 labels.push_back("spc[invalid]");
922 case COLORSPACE_REC_601_525:
923 labels.push_back("spc[rec601-525]");
925 case COLORSPACE_REC_601_625:
926 labels.push_back("spc[rec601-625]");
932 switch (from->output_gamma_curve) {
934 labels.push_back("gamma[invalid]");
937 labels.push_back("gamma[sRGB]");
939 case GAMMA_REC_601: // and GAMMA_REC_709
940 labels.push_back("gamma[rec601/709]");
946 switch (from->output_alpha_type) {
948 labels.push_back("alpha[invalid]");
951 labels.push_back("alpha[blank]");
953 case ALPHA_POSTMULTIPLIED:
954 labels.push_back("alpha[postmult]");
963 void EffectChain::output_dot_edge(FILE *fp,
964 const string &from_node_id,
965 const string &to_node_id,
966 const vector<string> &labels)
968 if (labels.empty()) {
969 fprintf(fp, " %s -> %s;\n", from_node_id.c_str(), to_node_id.c_str());
971 string label = labels[0];
972 for (unsigned k = 1; k < labels.size(); ++k) {
973 label += ", " + labels[k];
975 fprintf(fp, " %s -> %s [label=\"%s\"];\n", from_node_id.c_str(), to_node_id.c_str(), label.c_str());
979 void EffectChain::size_rectangle_to_fit(unsigned width, unsigned height, unsigned *output_width, unsigned *output_height)
981 unsigned scaled_width, scaled_height;
983 if (float(width) * aspect_denom >= float(height) * aspect_nom) {
984 // Same aspect, or W/H > aspect (image is wider than the frame).
985 // In either case, keep width, and adjust height.
986 scaled_width = width;
987 scaled_height = lrintf(width * aspect_denom / aspect_nom);
989 // W/H < aspect (image is taller than the frame), so keep height,
991 scaled_width = lrintf(height * aspect_nom / aspect_denom);
992 scaled_height = height;
995 // We should be consistently larger or smaller then the existing choice,
996 // since we have the same aspect.
997 assert(!(scaled_width < *output_width && scaled_height > *output_height));
998 assert(!(scaled_height < *output_height && scaled_width > *output_width));
1000 if (scaled_width >= *output_width && scaled_height >= *output_height) {
1001 *output_width = scaled_width;
1002 *output_height = scaled_height;
1006 // Propagate input texture sizes throughout, and inform effects downstream.
1007 // (Like a lot of other code, we depend on effects being in topological order.)
1008 void EffectChain::inform_input_sizes(Phase *phase)
1010 // All effects that have a defined size (inputs and RTT inputs)
1011 // get that. Reset all others.
1012 for (unsigned i = 0; i < phase->effects.size(); ++i) {
1013 Node *node = phase->effects[i];
1014 if (node->effect->num_inputs() == 0) {
1015 Input *input = static_cast<Input *>(node->effect);
1016 node->output_width = input->get_width();
1017 node->output_height = input->get_height();
1018 assert(node->output_width != 0);
1019 assert(node->output_height != 0);
1021 node->output_width = node->output_height = 0;
1024 for (unsigned i = 0; i < phase->inputs.size(); ++i) {
1025 Phase *input = phase->inputs[i];
1026 input->output_node->output_width = input->virtual_output_width;
1027 input->output_node->output_height = input->virtual_output_height;
1028 assert(input->output_node->output_width != 0);
1029 assert(input->output_node->output_height != 0);
1032 // Now propagate from the inputs towards the end, and inform as we go.
1033 // The rules are simple:
1035 // 1. Don't touch effects that already have given sizes (ie., inputs
1036 // or effects that change the output size).
1037 // 2. If all of your inputs have the same size, that will be your output size.
1038 // 3. Otherwise, your output size is 0x0.
1039 for (unsigned i = 0; i < phase->effects.size(); ++i) {
1040 Node *node = phase->effects[i];
1041 if (node->effect->num_inputs() == 0) {
1044 unsigned this_output_width = 0;
1045 unsigned this_output_height = 0;
1046 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
1047 Node *input = node->incoming_links[j];
1048 node->effect->inform_input_size(j, input->output_width, input->output_height);
1050 this_output_width = input->output_width;
1051 this_output_height = input->output_height;
1052 } else if (input->output_width != this_output_width || input->output_height != this_output_height) {
1054 this_output_width = 0;
1055 this_output_height = 0;
1058 if (node->effect->changes_output_size()) {
1059 // We cannot call get_output_size() before we've done inform_input_size()
1061 unsigned real_width, real_height;
1062 node->effect->get_output_size(&real_width, &real_height,
1063 &node->output_width, &node->output_height);
1064 assert(node->effect->sets_virtual_output_size() ||
1065 (real_width == node->output_width &&
1066 real_height == node->output_height));
1068 node->output_width = this_output_width;
1069 node->output_height = this_output_height;
1074 // Note: You should call inform_input_sizes() before this, as the last effect's
1075 // desired output size might change based on the inputs.
1076 void EffectChain::find_output_size(Phase *phase)
1078 Node *output_node = phase->is_compute_shader ? phase->compute_shader_node : phase->effects.back();
1080 // If the last effect explicitly sets an output size, use that.
1081 if (output_node->effect->changes_output_size()) {
1082 output_node->effect->get_output_size(&phase->output_width, &phase->output_height,
1083 &phase->virtual_output_width, &phase->virtual_output_height);
1084 assert(output_node->effect->sets_virtual_output_size() ||
1085 (phase->output_width == phase->virtual_output_width &&
1086 phase->output_height == phase->virtual_output_height));
1090 // If all effects have the same size, use that.
1091 unsigned output_width = 0, output_height = 0;
1092 bool all_inputs_same_size = true;
1094 for (unsigned i = 0; i < phase->inputs.size(); ++i) {
1095 Phase *input = phase->inputs[i];
1096 assert(input->output_width != 0);
1097 assert(input->output_height != 0);
1098 if (output_width == 0 && output_height == 0) {
1099 output_width = input->virtual_output_width;
1100 output_height = input->virtual_output_height;
1101 } else if (output_width != input->virtual_output_width ||
1102 output_height != input->virtual_output_height) {
1103 all_inputs_same_size = false;
1106 for (unsigned i = 0; i < phase->effects.size(); ++i) {
1107 Effect *effect = phase->effects[i]->effect;
1108 if (effect->num_inputs() != 0) {
1112 Input *input = static_cast<Input *>(effect);
1113 if (output_width == 0 && output_height == 0) {
1114 output_width = input->get_width();
1115 output_height = input->get_height();
1116 } else if (output_width != input->get_width() ||
1117 output_height != input->get_height()) {
1118 all_inputs_same_size = false;
1122 if (all_inputs_same_size) {
1123 assert(output_width != 0);
1124 assert(output_height != 0);
1125 phase->virtual_output_width = phase->output_width = output_width;
1126 phase->virtual_output_height = phase->output_height = output_height;
1130 // If not, fit all the inputs into the current aspect, and select the largest one.
1133 for (unsigned i = 0; i < phase->inputs.size(); ++i) {
1134 Phase *input = phase->inputs[i];
1135 assert(input->output_width != 0);
1136 assert(input->output_height != 0);
1137 size_rectangle_to_fit(input->output_width, input->output_height, &output_width, &output_height);
1139 for (unsigned i = 0; i < phase->effects.size(); ++i) {
1140 Effect *effect = phase->effects[i]->effect;
1141 if (effect->num_inputs() != 0) {
1145 Input *input = static_cast<Input *>(effect);
1146 size_rectangle_to_fit(input->get_width(), input->get_height(), &output_width, &output_height);
1148 assert(output_width != 0);
1149 assert(output_height != 0);
1150 phase->virtual_output_width = phase->output_width = output_width;
1151 phase->virtual_output_height = phase->output_height = output_height;
1154 void EffectChain::sort_all_nodes_topologically()
1156 nodes = topological_sort(nodes);
1159 vector<Node *> EffectChain::topological_sort(const vector<Node *> &nodes)
1161 set<Node *> nodes_left_to_visit(nodes.begin(), nodes.end());
1162 vector<Node *> sorted_list;
1163 for (unsigned i = 0; i < nodes.size(); ++i) {
1164 topological_sort_visit_node(nodes[i], &nodes_left_to_visit, &sorted_list);
1166 reverse(sorted_list.begin(), sorted_list.end());
1170 void EffectChain::topological_sort_visit_node(Node *node, set<Node *> *nodes_left_to_visit, vector<Node *> *sorted_list)
1172 if (nodes_left_to_visit->count(node) == 0) {
1175 nodes_left_to_visit->erase(node);
1176 for (unsigned i = 0; i < node->outgoing_links.size(); ++i) {
1177 topological_sort_visit_node(node->outgoing_links[i], nodes_left_to_visit, sorted_list);
1179 sorted_list->push_back(node);
1182 void EffectChain::find_color_spaces_for_inputs()
1184 for (unsigned i = 0; i < nodes.size(); ++i) {
1185 Node *node = nodes[i];
1186 if (node->disabled) {
1189 if (node->incoming_links.size() == 0) {
1190 Input *input = static_cast<Input *>(node->effect);
1191 node->output_color_space = input->get_color_space();
1192 node->output_gamma_curve = input->get_gamma_curve();
1194 Effect::AlphaHandling alpha_handling = input->alpha_handling();
1195 switch (alpha_handling) {
1196 case Effect::OUTPUT_BLANK_ALPHA:
1197 node->output_alpha_type = ALPHA_BLANK;
1199 case Effect::INPUT_AND_OUTPUT_PREMULTIPLIED_ALPHA:
1200 node->output_alpha_type = ALPHA_PREMULTIPLIED;
1202 case Effect::OUTPUT_POSTMULTIPLIED_ALPHA:
1203 node->output_alpha_type = ALPHA_POSTMULTIPLIED;
1205 case Effect::INPUT_PREMULTIPLIED_ALPHA_KEEP_BLANK:
1206 case Effect::DONT_CARE_ALPHA_TYPE:
1211 if (node->output_alpha_type == ALPHA_PREMULTIPLIED) {
1212 assert(node->output_gamma_curve == GAMMA_LINEAR);
1218 // Propagate gamma and color space information as far as we can in the graph.
1219 // The rules are simple: Anything where all the inputs agree, get that as
1220 // output as well. Anything else keeps having *_INVALID.
1221 void EffectChain::propagate_gamma_and_color_space()
1223 // We depend on going through the nodes in order.
1224 sort_all_nodes_topologically();
1226 for (unsigned i = 0; i < nodes.size(); ++i) {
1227 Node *node = nodes[i];
1228 if (node->disabled) {
1231 assert(node->incoming_links.size() == node->effect->num_inputs());
1232 if (node->incoming_links.size() == 0) {
1233 assert(node->output_color_space != COLORSPACE_INVALID);
1234 assert(node->output_gamma_curve != GAMMA_INVALID);
1238 Colorspace color_space = node->incoming_links[0]->output_color_space;
1239 GammaCurve gamma_curve = node->incoming_links[0]->output_gamma_curve;
1240 for (unsigned j = 1; j < node->incoming_links.size(); ++j) {
1241 if (node->incoming_links[j]->output_color_space != color_space) {
1242 color_space = COLORSPACE_INVALID;
1244 if (node->incoming_links[j]->output_gamma_curve != gamma_curve) {
1245 gamma_curve = GAMMA_INVALID;
1249 // The conversion effects already have their outputs set correctly,
1250 // so leave them alone.
1251 if (node->effect->effect_type_id() != "ColorspaceConversionEffect") {
1252 node->output_color_space = color_space;
1254 if (node->effect->effect_type_id() != "GammaCompressionEffect" &&
1255 node->effect->effect_type_id() != "GammaExpansionEffect") {
1256 node->output_gamma_curve = gamma_curve;
1261 // Propagate alpha information as far as we can in the graph.
1262 // Similar to propagate_gamma_and_color_space().
1263 void EffectChain::propagate_alpha()
1265 // We depend on going through the nodes in order.
1266 sort_all_nodes_topologically();
1268 for (unsigned i = 0; i < nodes.size(); ++i) {
1269 Node *node = nodes[i];
1270 if (node->disabled) {
1273 assert(node->incoming_links.size() == node->effect->num_inputs());
1274 if (node->incoming_links.size() == 0) {
1275 assert(node->output_alpha_type != ALPHA_INVALID);
1279 // The alpha multiplication/division effects are special cases.
1280 if (node->effect->effect_type_id() == "AlphaMultiplicationEffect") {
1281 assert(node->incoming_links.size() == 1);
1282 assert(node->incoming_links[0]->output_alpha_type == ALPHA_POSTMULTIPLIED);
1283 node->output_alpha_type = ALPHA_PREMULTIPLIED;
1286 if (node->effect->effect_type_id() == "AlphaDivisionEffect") {
1287 assert(node->incoming_links.size() == 1);
1288 assert(node->incoming_links[0]->output_alpha_type == ALPHA_PREMULTIPLIED);
1289 node->output_alpha_type = ALPHA_POSTMULTIPLIED;
1293 // GammaCompressionEffect and GammaExpansionEffect are also a special case,
1294 // because they are the only one that _need_ postmultiplied alpha.
1295 if (node->effect->effect_type_id() == "GammaCompressionEffect" ||
1296 node->effect->effect_type_id() == "GammaExpansionEffect") {
1297 assert(node->incoming_links.size() == 1);
1298 if (node->incoming_links[0]->output_alpha_type == ALPHA_BLANK) {
1299 node->output_alpha_type = ALPHA_BLANK;
1300 } else if (node->incoming_links[0]->output_alpha_type == ALPHA_POSTMULTIPLIED) {
1301 node->output_alpha_type = ALPHA_POSTMULTIPLIED;
1303 node->output_alpha_type = ALPHA_INVALID;
1308 // Only inputs can have unconditional alpha output (OUTPUT_BLANK_ALPHA
1309 // or OUTPUT_POSTMULTIPLIED_ALPHA), and they have already been
1310 // taken care of above. Rationale: Even if you could imagine
1311 // e.g. an effect that took in an image and set alpha=1.0
1312 // unconditionally, it wouldn't make any sense to have it as
1313 // e.g. OUTPUT_BLANK_ALPHA, since it wouldn't know whether it
1314 // got its input pre- or postmultiplied, so it wouldn't know
1315 // whether to divide away the old alpha or not.
1316 Effect::AlphaHandling alpha_handling = node->effect->alpha_handling();
1317 assert(alpha_handling == Effect::INPUT_AND_OUTPUT_PREMULTIPLIED_ALPHA ||
1318 alpha_handling == Effect::INPUT_PREMULTIPLIED_ALPHA_KEEP_BLANK ||
1319 alpha_handling == Effect::DONT_CARE_ALPHA_TYPE);
1321 // If the node has multiple inputs, check that they are all valid and
1323 bool any_invalid = false;
1324 bool any_premultiplied = false;
1325 bool any_postmultiplied = false;
1327 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
1328 switch (node->incoming_links[j]->output_alpha_type) {
1333 // Blank is good as both pre- and postmultiplied alpha,
1334 // so just ignore it.
1336 case ALPHA_PREMULTIPLIED:
1337 any_premultiplied = true;
1339 case ALPHA_POSTMULTIPLIED:
1340 any_postmultiplied = true;
1348 node->output_alpha_type = ALPHA_INVALID;
1352 // Inputs must be of the same type.
1353 if (any_premultiplied && any_postmultiplied) {
1354 node->output_alpha_type = ALPHA_INVALID;
1358 if (alpha_handling == Effect::INPUT_AND_OUTPUT_PREMULTIPLIED_ALPHA ||
1359 alpha_handling == Effect::INPUT_PREMULTIPLIED_ALPHA_KEEP_BLANK) {
1360 // This combination (requiring premultiplied alpha, but _not_ requiring
1361 // linear light) is illegal, since the combination of premultiplied alpha
1362 // and nonlinear inputs is meaningless.
1363 assert(node->effect->needs_linear_light());
1365 // If the effect has asked for premultiplied alpha, check that it has got it.
1366 if (any_postmultiplied) {
1367 node->output_alpha_type = ALPHA_INVALID;
1368 } else if (!any_premultiplied &&
1369 alpha_handling == Effect::INPUT_PREMULTIPLIED_ALPHA_KEEP_BLANK) {
1370 // Blank input alpha, and the effect preserves blank alpha.
1371 node->output_alpha_type = ALPHA_BLANK;
1373 node->output_alpha_type = ALPHA_PREMULTIPLIED;
1376 // OK, all inputs are the same, and this effect is not going
1378 assert(alpha_handling == Effect::DONT_CARE_ALPHA_TYPE);
1379 if (any_premultiplied) {
1380 node->output_alpha_type = ALPHA_PREMULTIPLIED;
1381 } else if (any_postmultiplied) {
1382 node->output_alpha_type = ALPHA_POSTMULTIPLIED;
1384 node->output_alpha_type = ALPHA_BLANK;
1390 bool EffectChain::node_needs_colorspace_fix(Node *node)
1392 if (node->disabled) {
1395 if (node->effect->num_inputs() == 0) {
1399 // propagate_gamma_and_color_space() has already set our output
1400 // to COLORSPACE_INVALID if the inputs differ, so we can rely on that.
1401 if (node->output_color_space == COLORSPACE_INVALID) {
1404 return (node->effect->needs_srgb_primaries() && node->output_color_space != COLORSPACE_sRGB);
1407 // Fix up color spaces so that there are no COLORSPACE_INVALID nodes left in
1408 // the graph. Our strategy is not always optimal, but quite simple:
1409 // Find an effect that's as early as possible where the inputs are of
1410 // unacceptable colorspaces (that is, either different, or, if the effect only
1411 // wants sRGB, not sRGB.) Add appropriate conversions on all its inputs,
1412 // propagate the information anew, and repeat until there are no more such
1414 void EffectChain::fix_internal_color_spaces()
1416 unsigned colorspace_propagation_pass = 0;
1420 for (unsigned i = 0; i < nodes.size(); ++i) {
1421 Node *node = nodes[i];
1422 if (!node_needs_colorspace_fix(node)) {
1426 // Go through each input that is not sRGB, and insert
1427 // a colorspace conversion after it.
1428 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
1429 Node *input = node->incoming_links[j];
1430 assert(input->output_color_space != COLORSPACE_INVALID);
1431 if (input->output_color_space == COLORSPACE_sRGB) {
1434 Node *conversion = add_node(new ColorspaceConversionEffect());
1435 CHECK(conversion->effect->set_int("source_space", input->output_color_space));
1436 CHECK(conversion->effect->set_int("destination_space", COLORSPACE_sRGB));
1437 conversion->output_color_space = COLORSPACE_sRGB;
1438 replace_sender(input, conversion);
1439 connect_nodes(input, conversion);
1442 // Re-sort topologically, and propagate the new information.
1443 propagate_gamma_and_color_space();
1450 sprintf(filename, "step5-colorspacefix-iter%u.dot", ++colorspace_propagation_pass);
1451 output_dot(filename);
1452 assert(colorspace_propagation_pass < 100);
1453 } while (found_any);
1455 for (unsigned i = 0; i < nodes.size(); ++i) {
1456 Node *node = nodes[i];
1457 if (node->disabled) {
1460 assert(node->output_color_space != COLORSPACE_INVALID);
1464 bool EffectChain::node_needs_alpha_fix(Node *node)
1466 if (node->disabled) {
1470 // propagate_alpha() has already set our output to ALPHA_INVALID if the
1471 // inputs differ or we are otherwise in mismatch, so we can rely on that.
1472 return (node->output_alpha_type == ALPHA_INVALID);
1475 // Fix up alpha so that there are no ALPHA_INVALID nodes left in
1476 // the graph. Similar to fix_internal_color_spaces().
1477 void EffectChain::fix_internal_alpha(unsigned step)
1479 unsigned alpha_propagation_pass = 0;
1483 for (unsigned i = 0; i < nodes.size(); ++i) {
1484 Node *node = nodes[i];
1485 if (!node_needs_alpha_fix(node)) {
1489 // If we need to fix up GammaExpansionEffect, then clearly something
1490 // is wrong, since the combination of premultiplied alpha and nonlinear inputs
1492 assert(node->effect->effect_type_id() != "GammaExpansionEffect");
1494 AlphaType desired_type = ALPHA_PREMULTIPLIED;
1496 // GammaCompressionEffect is special; it needs postmultiplied alpha.
1497 if (node->effect->effect_type_id() == "GammaCompressionEffect") {
1498 assert(node->incoming_links.size() == 1);
1499 assert(node->incoming_links[0]->output_alpha_type == ALPHA_PREMULTIPLIED);
1500 desired_type = ALPHA_POSTMULTIPLIED;
1503 // Go through each input that is not premultiplied alpha, and insert
1504 // a conversion before it.
1505 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
1506 Node *input = node->incoming_links[j];
1507 assert(input->output_alpha_type != ALPHA_INVALID);
1508 if (input->output_alpha_type == desired_type ||
1509 input->output_alpha_type == ALPHA_BLANK) {
1513 if (desired_type == ALPHA_PREMULTIPLIED) {
1514 conversion = add_node(new AlphaMultiplicationEffect());
1516 conversion = add_node(new AlphaDivisionEffect());
1518 conversion->output_alpha_type = desired_type;
1519 replace_sender(input, conversion);
1520 connect_nodes(input, conversion);
1523 // Re-sort topologically, and propagate the new information.
1524 propagate_gamma_and_color_space();
1532 sprintf(filename, "step%u-alphafix-iter%u.dot", step, ++alpha_propagation_pass);
1533 output_dot(filename);
1534 assert(alpha_propagation_pass < 100);
1535 } while (found_any);
1537 for (unsigned i = 0; i < nodes.size(); ++i) {
1538 Node *node = nodes[i];
1539 if (node->disabled) {
1542 assert(node->output_alpha_type != ALPHA_INVALID);
1546 // Make so that the output is in the desired color space.
1547 void EffectChain::fix_output_color_space()
1549 Node *output = find_output_node();
1550 if (output->output_color_space != output_format.color_space) {
1551 Node *conversion = add_node(new ColorspaceConversionEffect());
1552 CHECK(conversion->effect->set_int("source_space", output->output_color_space));
1553 CHECK(conversion->effect->set_int("destination_space", output_format.color_space));
1554 conversion->output_color_space = output_format.color_space;
1555 connect_nodes(output, conversion);
1557 propagate_gamma_and_color_space();
1561 // Make so that the output is in the desired pre-/postmultiplication alpha state.
1562 void EffectChain::fix_output_alpha()
1564 Node *output = find_output_node();
1565 assert(output->output_alpha_type != ALPHA_INVALID);
1566 if (output->output_alpha_type == ALPHA_BLANK) {
1567 // No alpha output, so we don't care.
1570 if (output->output_alpha_type == ALPHA_PREMULTIPLIED &&
1571 output_alpha_format == OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED) {
1572 Node *conversion = add_node(new AlphaDivisionEffect());
1573 connect_nodes(output, conversion);
1575 propagate_gamma_and_color_space();
1577 if (output->output_alpha_type == ALPHA_POSTMULTIPLIED &&
1578 output_alpha_format == OUTPUT_ALPHA_FORMAT_PREMULTIPLIED) {
1579 Node *conversion = add_node(new AlphaMultiplicationEffect());
1580 connect_nodes(output, conversion);
1582 propagate_gamma_and_color_space();
1586 bool EffectChain::node_needs_gamma_fix(Node *node)
1588 if (node->disabled) {
1592 // Small hack since the output is not an explicit node:
1593 // If we are the last node and our output is in the wrong
1594 // space compared to EffectChain's output, we need to fix it.
1595 // This will only take us to linear, but fix_output_gamma()
1596 // will come and take us to the desired output gamma
1599 // This needs to be before everything else, since it could
1600 // even apply to inputs (if they are the only effect).
1601 if (node->outgoing_links.empty() &&
1602 node->output_gamma_curve != output_format.gamma_curve &&
1603 node->output_gamma_curve != GAMMA_LINEAR) {
1607 if (node->effect->num_inputs() == 0) {
1611 // propagate_gamma_and_color_space() has already set our output
1612 // to GAMMA_INVALID if the inputs differ, so we can rely on that,
1613 // except for GammaCompressionEffect.
1614 if (node->output_gamma_curve == GAMMA_INVALID) {
1617 if (node->effect->effect_type_id() == "GammaCompressionEffect") {
1618 assert(node->incoming_links.size() == 1);
1619 return node->incoming_links[0]->output_gamma_curve != GAMMA_LINEAR;
1622 return (node->effect->needs_linear_light() && node->output_gamma_curve != GAMMA_LINEAR);
1625 // Very similar to fix_internal_color_spaces(), but for gamma.
1626 // There is one difference, though; before we start adding conversion nodes,
1627 // we see if we can get anything out of asking the sources to deliver
1628 // linear gamma directly. fix_internal_gamma_by_asking_inputs()
1629 // does that part, while fix_internal_gamma_by_inserting_nodes()
1630 // inserts nodes as needed afterwards.
1631 void EffectChain::fix_internal_gamma_by_asking_inputs(unsigned step)
1633 unsigned gamma_propagation_pass = 0;
1637 for (unsigned i = 0; i < nodes.size(); ++i) {
1638 Node *node = nodes[i];
1639 if (!node_needs_gamma_fix(node)) {
1643 // See if all inputs can give us linear gamma. If not, leave it.
1644 vector<Node *> nonlinear_inputs;
1645 find_all_nonlinear_inputs(node, &nonlinear_inputs);
1646 assert(!nonlinear_inputs.empty());
1649 for (unsigned i = 0; i < nonlinear_inputs.size(); ++i) {
1650 Input *input = static_cast<Input *>(nonlinear_inputs[i]->effect);
1651 all_ok &= input->can_output_linear_gamma();
1658 for (unsigned i = 0; i < nonlinear_inputs.size(); ++i) {
1659 CHECK(nonlinear_inputs[i]->effect->set_int("output_linear_gamma", 1));
1660 nonlinear_inputs[i]->output_gamma_curve = GAMMA_LINEAR;
1663 // Re-sort topologically, and propagate the new information.
1664 propagate_gamma_and_color_space();
1671 sprintf(filename, "step%u-gammafix-iter%u.dot", step, ++gamma_propagation_pass);
1672 output_dot(filename);
1673 assert(gamma_propagation_pass < 100);
1674 } while (found_any);
1677 void EffectChain::fix_internal_gamma_by_inserting_nodes(unsigned step)
1679 unsigned gamma_propagation_pass = 0;
1683 for (unsigned i = 0; i < nodes.size(); ++i) {
1684 Node *node = nodes[i];
1685 if (!node_needs_gamma_fix(node)) {
1689 // Special case: We could be an input and still be asked to
1690 // fix our gamma; if so, we should be the only node
1691 // (as node_needs_gamma_fix() would only return true in
1692 // for an input in that case). That means we should insert
1693 // a conversion node _after_ ourselves.
1694 if (node->incoming_links.empty()) {
1695 assert(node->outgoing_links.empty());
1696 Node *conversion = add_node(new GammaExpansionEffect());
1697 CHECK(conversion->effect->set_int("source_curve", node->output_gamma_curve));
1698 conversion->output_gamma_curve = GAMMA_LINEAR;
1699 connect_nodes(node, conversion);
1702 // If not, go through each input that is not linear gamma,
1703 // and insert a gamma conversion after it.
1704 for (unsigned j = 0; j < node->incoming_links.size(); ++j) {
1705 Node *input = node->incoming_links[j];
1706 assert(input->output_gamma_curve != GAMMA_INVALID);
1707 if (input->output_gamma_curve == GAMMA_LINEAR) {
1710 Node *conversion = add_node(new GammaExpansionEffect());
1711 CHECK(conversion->effect->set_int("source_curve", input->output_gamma_curve));
1712 conversion->output_gamma_curve = GAMMA_LINEAR;
1713 replace_sender(input, conversion);
1714 connect_nodes(input, conversion);
1717 // Re-sort topologically, and propagate the new information.
1719 propagate_gamma_and_color_space();
1726 sprintf(filename, "step%u-gammafix-iter%u.dot", step, ++gamma_propagation_pass);
1727 output_dot(filename);
1728 assert(gamma_propagation_pass < 100);
1729 } while (found_any);
1731 for (unsigned i = 0; i < nodes.size(); ++i) {
1732 Node *node = nodes[i];
1733 if (node->disabled) {
1736 assert(node->output_gamma_curve != GAMMA_INVALID);
1740 // Make so that the output is in the desired gamma.
1741 // Note that this assumes linear input gamma, so it might create the need
1742 // for another pass of fix_internal_gamma().
1743 void EffectChain::fix_output_gamma()
1745 Node *output = find_output_node();
1746 if (output->output_gamma_curve != output_format.gamma_curve) {
1747 Node *conversion = add_node(new GammaCompressionEffect());
1748 CHECK(conversion->effect->set_int("destination_curve", output_format.gamma_curve));
1749 conversion->output_gamma_curve = output_format.gamma_curve;
1750 connect_nodes(output, conversion);
1754 // If the user has requested Y'CbCr output, we need to do this conversion
1755 // _after_ GammaCompressionEffect etc., but before dither (see below).
1756 // This is because Y'CbCr, with the exception of a special optional mode
1757 // in Rec. 2020 (which we currently don't support), is defined to work on
1758 // gamma-encoded data.
1759 void EffectChain::add_ycbcr_conversion_if_needed()
1761 assert(output_color_rgba || num_output_color_ycbcr > 0);
1762 if (num_output_color_ycbcr == 0) {
1765 Node *output = find_output_node();
1766 ycbcr_conversion_effect_node = add_node(new YCbCrConversionEffect(output_ycbcr_format, output_ycbcr_type));
1767 connect_nodes(output, ycbcr_conversion_effect_node);
1770 // If the user has requested dither, add a DitherEffect right at the end
1771 // (after GammaCompressionEffect etc.). This needs to be done after everything else,
1772 // since dither is about the only effect that can _not_ be done in linear space.
1773 void EffectChain::add_dither_if_needed()
1775 if (num_dither_bits == 0) {
1778 Node *output = find_output_node();
1779 Node *dither = add_node(new DitherEffect());
1780 CHECK(dither->effect->set_int("num_bits", num_dither_bits));
1781 connect_nodes(output, dither);
1783 dither_effect = dither->effect;
1786 // Compute shaders can't output to the framebuffer, so if the last
1787 // phase ends in a compute shader, add a dummy phase at the end that
1788 // only blits directly from the temporary texture.
1790 // TODO: Add an API for rendering directly to textures, for the cases
1791 // where we're only rendering to an FBO anyway.
1792 void EffectChain::add_dummy_effect_if_needed()
1794 Node *output = find_output_node();
1796 // See if the last effect that's not strong one-to-one is a compute shader.
1797 Node *last_effect = output;
1798 while (last_effect->effect->num_inputs() == 1 &&
1799 last_effect->effect->strong_one_to_one_sampling()) {
1800 last_effect = last_effect->incoming_links[0];
1802 if (last_effect->effect->is_compute_shader()) {
1803 Node *dummy = add_node(new ComputeShaderOutputDisplayEffect());
1804 connect_nodes(output, dummy);
1805 has_dummy_effect = true;
1809 // Find the output node. This is, simply, one that has no outgoing links.
1810 // If there are multiple ones, the graph is malformed (we do not support
1811 // multiple outputs right now).
1812 Node *EffectChain::find_output_node()
1814 vector<Node *> output_nodes;
1815 for (unsigned i = 0; i < nodes.size(); ++i) {
1816 Node *node = nodes[i];
1817 if (node->disabled) {
1820 if (node->outgoing_links.empty()) {
1821 output_nodes.push_back(node);
1824 assert(output_nodes.size() == 1);
1825 return output_nodes[0];
1828 void EffectChain::finalize()
1830 // Output the graph as it is before we do any conversions on it.
1831 output_dot("step0-start.dot");
1833 // Give each effect in turn a chance to rewrite its own part of the graph.
1834 // Note that if more effects are added as part of this, they will be
1835 // picked up as part of the same for loop, since they are added at the end.
1836 for (unsigned i = 0; i < nodes.size(); ++i) {
1837 nodes[i]->effect->rewrite_graph(this, nodes[i]);
1839 output_dot("step1-rewritten.dot");
1841 find_color_spaces_for_inputs();
1842 output_dot("step2-input-colorspace.dot");
1845 output_dot("step3-propagated-alpha.dot");
1847 propagate_gamma_and_color_space();
1848 output_dot("step4-propagated-all.dot");
1850 fix_internal_color_spaces();
1851 fix_internal_alpha(6);
1852 fix_output_color_space();
1853 output_dot("step7-output-colorspacefix.dot");
1855 output_dot("step8-output-alphafix.dot");
1857 // Note that we need to fix gamma after colorspace conversion,
1858 // because colorspace conversions might create needs for gamma conversions.
1859 // Also, we need to run an extra pass of fix_internal_gamma() after
1860 // fixing the output gamma, as we only have conversions to/from linear,
1861 // and fix_internal_alpha() since GammaCompressionEffect needs
1862 // postmultiplied input.
1863 fix_internal_gamma_by_asking_inputs(9);
1864 fix_internal_gamma_by_inserting_nodes(10);
1866 output_dot("step11-output-gammafix.dot");
1868 output_dot("step12-output-alpha-propagated.dot");
1869 fix_internal_alpha(13);
1870 output_dot("step14-output-alpha-fixed.dot");
1871 fix_internal_gamma_by_asking_inputs(15);
1872 fix_internal_gamma_by_inserting_nodes(16);
1874 output_dot("step17-before-ycbcr.dot");
1875 add_ycbcr_conversion_if_needed();
1877 output_dot("step18-before-dither.dot");
1878 add_dither_if_needed();
1880 output_dot("step19-before-dummy-effect.dot");
1881 add_dummy_effect_if_needed();
1883 output_dot("step20-final.dot");
1885 // Construct all needed GLSL programs, starting at the output.
1886 // We need to keep track of which effects have already been computed,
1887 // as an effect with multiple users could otherwise be calculated
1889 map<Node *, Phase *> completed_effects;
1890 construct_phase(find_output_node(), &completed_effects);
1892 output_dot("step21-split-to-phases.dot");
1894 assert(phases[0]->inputs.empty());
1899 void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height)
1901 // Save original viewport.
1902 GLuint x = 0, y = 0;
1904 if (width == 0 && height == 0) {
1906 glGetIntegerv(GL_VIEWPORT, viewport);
1909 width = viewport[2];
1910 height = viewport[3];
1913 render(dest_fbo, {}, x, y, width, height);
1916 void EffectChain::render_to_texture(const vector<DestinationTexture> &destinations, unsigned width, unsigned height)
1919 assert(!destinations.empty());
1921 if (!has_dummy_effect) {
1922 // We don't end in a compute shader, so there's nothing specific for us to do.
1923 // Create an FBO for this set of textures, and just render to that.
1924 GLuint texnums[4] = { 0, 0, 0, 0 };
1925 for (unsigned i = 0; i < destinations.size() && i < 4; ++i) {
1926 texnums[i] = destinations[i].texnum;
1928 GLuint dest_fbo = resource_pool->create_fbo(texnums[0], texnums[1], texnums[2], texnums[3]);
1929 render(dest_fbo, {}, 0, 0, width, height);
1930 resource_pool->release_fbo(dest_fbo);
1932 render((GLuint)-1, destinations, 0, 0, width, height);
1936 void EffectChain::render(GLuint dest_fbo, const vector<DestinationTexture> &destinations, unsigned x, unsigned y, unsigned width, unsigned height)
1939 assert(destinations.size() <= 1);
1941 // This needs to be set anew, in case we are coming from a different context
1942 // from when we initialized.
1944 glDisable(GL_DITHER);
1947 const bool final_srgb = glIsEnabled(GL_FRAMEBUFFER_SRGB);
1949 bool current_srgb = final_srgb;
1953 glDisable(GL_BLEND);
1955 glDisable(GL_DEPTH_TEST);
1957 glDepthMask(GL_FALSE);
1960 set<Phase *> generated_mipmaps;
1962 // We keep one texture per output, but only for as long as we actually have any
1963 // phases that need it as an input. (We don't make any effort to reorder phases
1964 // to minimize the number of textures in play, as register allocation can be
1965 // complicated and we rarely have much to gain, since our graphs are typically
1967 map<Phase *, GLuint> output_textures;
1968 map<Phase *, int> ref_counts;
1969 for (Phase *phase : phases) {
1970 for (Phase *input : phase->inputs) {
1971 ++ref_counts[input];
1975 size_t num_phases = phases.size();
1976 if (destinations.empty()) {
1977 assert(dest_fbo != (GLuint)-1);
1979 assert(has_dummy_effect);
1982 assert(num_phases >= 2);
1983 assert(!phases.back()->is_compute_shader);
1984 assert(phases.back()->effects.size() == 1);
1985 assert(phases.back()->effects[0]->effect->effect_type_id() == "ComputeShaderOutputDisplayEffect");
1987 // We are rendering to a set of textures, so we can run the compute shader
1988 // directly and skip the dummy phase.
1992 for (unsigned phase_num = 0; phase_num < num_phases; ++phase_num) {
1993 Phase *phase = phases[phase_num];
1995 if (do_phase_timing) {
1996 GLuint timer_query_object;
1997 if (phase->timer_query_objects_free.empty()) {
1998 glGenQueries(1, &timer_query_object);
2000 timer_query_object = phase->timer_query_objects_free.front();
2001 phase->timer_query_objects_free.pop_front();
2003 glBeginQuery(GL_TIME_ELAPSED, timer_query_object);
2004 phase->timer_query_objects_running.push_back(timer_query_object);
2006 bool last_phase = (phase_num == num_phases - 1);
2007 if (phase_num == num_phases - 1) {
2008 // Last phase goes to the output the user specified.
2009 if (!phase->is_compute_shader) {
2010 glBindFramebuffer(GL_FRAMEBUFFER, dest_fbo);
2012 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
2013 assert(status == GL_FRAMEBUFFER_COMPLETE);
2014 glViewport(x, y, width, height);
2016 if (dither_effect != nullptr) {
2017 CHECK(dither_effect->set_int("output_width", width));
2018 CHECK(dither_effect->set_int("output_height", height));
2022 // Enable sRGB rendering for intermediates in case we are
2023 // rendering to an sRGB format.
2024 // TODO: Support this for compute shaders.
2025 bool needs_srgb = last_phase ? final_srgb : true;
2026 if (needs_srgb && !current_srgb) {
2027 glEnable(GL_FRAMEBUFFER_SRGB);
2029 current_srgb = true;
2030 } else if (!needs_srgb && current_srgb) {
2031 glDisable(GL_FRAMEBUFFER_SRGB);
2033 current_srgb = true;
2036 // Find a texture for this phase.
2037 inform_input_sizes(phase);
2038 find_output_size(phase);
2039 vector<DestinationTexture> phase_destinations;
2041 GLuint tex_num = resource_pool->create_2d_texture(intermediate_format, phase->output_width, phase->output_height);
2042 output_textures.insert(make_pair(phase, tex_num));
2043 phase_destinations.push_back(DestinationTexture{ tex_num, intermediate_format });
2045 // The output texture needs to have valid state to be written to by a compute shader.
2046 glActiveTexture(GL_TEXTURE0);
2048 glBindTexture(GL_TEXTURE_2D, tex_num);
2050 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
2052 } else if (phase->is_compute_shader) {
2053 assert(!destinations.empty());
2054 phase_destinations = destinations;
2057 execute_phase(phase, output_textures, phase_destinations, &generated_mipmaps);
2058 if (do_phase_timing) {
2059 glEndQuery(GL_TIME_ELAPSED);
2062 // Drop any input textures we don't need anymore.
2063 for (Phase *input : phase->inputs) {
2064 assert(ref_counts[input] > 0);
2065 if (--ref_counts[input] == 0) {
2066 resource_pool->release_2d_texture(output_textures[input]);
2067 output_textures.erase(input);
2072 for (const auto &phase_and_texnum : output_textures) {
2073 resource_pool->release_2d_texture(phase_and_texnum.second);
2076 glBindFramebuffer(GL_FRAMEBUFFER, 0);
2081 glBindBuffer(GL_ARRAY_BUFFER, 0);
2083 glBindVertexArray(0);
2086 if (do_phase_timing) {
2087 // Get back the timer queries.
2088 for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
2089 Phase *phase = phases[phase_num];
2090 for (auto timer_it = phase->timer_query_objects_running.cbegin();
2091 timer_it != phase->timer_query_objects_running.cend(); ) {
2092 GLint timer_query_object = *timer_it;
2094 glGetQueryObjectiv(timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
2096 GLuint64 time_elapsed;
2097 glGetQueryObjectui64v(timer_query_object, GL_QUERY_RESULT, &time_elapsed);
2098 phase->time_elapsed_ns += time_elapsed;
2099 ++phase->num_measured_iterations;
2100 phase->timer_query_objects_free.push_back(timer_query_object);
2101 phase->timer_query_objects_running.erase(timer_it++);
2110 void EffectChain::enable_phase_timing(bool enable)
2113 assert(movit_timer_queries_supported);
2115 this->do_phase_timing = enable;
2118 void EffectChain::reset_phase_timing()
2120 for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
2121 Phase *phase = phases[phase_num];
2122 phase->time_elapsed_ns = 0;
2123 phase->num_measured_iterations = 0;
2127 void EffectChain::print_phase_timing()
2129 double total_time_ms = 0.0;
2130 for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
2131 Phase *phase = phases[phase_num];
2132 double avg_time_ms = phase->time_elapsed_ns * 1e-6 / phase->num_measured_iterations;
2133 printf("Phase %d: %5.1f ms [", phase_num, avg_time_ms);
2134 for (unsigned effect_num = 0; effect_num < phase->effects.size(); ++effect_num) {
2135 if (effect_num != 0) {
2138 printf("%s", phase->effects[effect_num]->effect->effect_type_id().c_str());
2141 total_time_ms += avg_time_ms;
2143 printf("Total: %5.1f ms\n", total_time_ms);
2146 void EffectChain::execute_phase(Phase *phase,
2147 const map<Phase *, GLuint> &output_textures,
2148 const vector<DestinationTexture> &destinations,
2149 set<Phase *> *generated_mipmaps)
2151 // Set up RTT inputs for this phase.
2152 for (unsigned sampler = 0; sampler < phase->inputs.size(); ++sampler) {
2153 glActiveTexture(GL_TEXTURE0 + sampler);
2154 Phase *input = phase->inputs[sampler];
2155 input->output_node->bound_sampler_num = sampler;
2156 const auto it = output_textures.find(input);
2157 assert(it != output_textures.end());
2158 glBindTexture(GL_TEXTURE_2D, it->second);
2161 // See if anything using this RTT input (in this phase) needs mipmaps.
2162 // TODO: It could be that we get conflicting logic here, if we have
2163 // multiple effects with incompatible mipmaps using the same
2164 // RTT input. However, that is obscure enough that we can deal
2165 // with it at some future point (preferably when we have
2166 // universal support for separate sampler objects!). For now,
2167 // an assert is good enough. See also the TODO at bound_sampler_num.
2168 bool any_needs_mipmaps = false, any_refuses_mipmaps = false;
2169 for (Node *node : phase->effects) {
2170 assert(node->incoming_links.size() == node->incoming_link_type.size());
2171 for (size_t i = 0; i < node->incoming_links.size(); ++i) {
2172 if (node->incoming_links[i] == input->output_node &&
2173 node->incoming_link_type[i] == IN_ANOTHER_PHASE) {
2174 if (node->needs_mipmaps == Effect::NEEDS_MIPMAPS) {
2175 any_needs_mipmaps = true;
2176 } else if (node->needs_mipmaps == Effect::CANNOT_ACCEPT_MIPMAPS) {
2177 any_refuses_mipmaps = true;
2182 assert(!(any_needs_mipmaps && any_refuses_mipmaps));
2184 if (any_needs_mipmaps && generated_mipmaps->count(input) == 0) {
2185 glGenerateMipmap(GL_TEXTURE_2D);
2187 generated_mipmaps->insert(input);
2189 setup_rtt_sampler(sampler, any_needs_mipmaps);
2190 phase->input_samplers[sampler] = sampler; // Bind the sampler to the right uniform.
2193 GLuint instance_program_num = resource_pool->use_glsl_program(phase->glsl_program_num);
2196 // And now the output.
2198 if (phase->is_compute_shader) {
2199 assert(!destinations.empty());
2201 // This is currently the only place where we use image units,
2202 // so we can always start at 0. TODO: Support multiple destinations.
2203 phase->outbuf_image_unit = 0;
2204 glBindImageTexture(phase->outbuf_image_unit, destinations[0].texnum, 0, GL_FALSE, 0, GL_WRITE_ONLY, destinations[0].format);
2206 phase->uniform_output_size[0] = phase->output_width;
2207 phase->uniform_output_size[1] = phase->output_height;
2208 phase->inv_output_size.x = 1.0f / phase->output_width;
2209 phase->inv_output_size.y = 1.0f / phase->output_height;
2210 phase->output_texcoord_adjust.x = 0.5f / phase->output_width;
2211 phase->output_texcoord_adjust.y = 0.5f / phase->output_height;
2212 } else if (!destinations.empty()) {
2213 assert(destinations.size() == 1);
2214 fbo = resource_pool->create_fbo(destinations[0].texnum);
2215 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
2216 glViewport(0, 0, phase->output_width, phase->output_height);
2219 // Give the required parameters to all the effects.
2220 unsigned sampler_num = phase->inputs.size();
2221 for (unsigned i = 0; i < phase->effects.size(); ++i) {
2222 Node *node = phase->effects[i];
2223 unsigned old_sampler_num = sampler_num;
2224 node->effect->set_gl_state(instance_program_num, phase->effect_ids[make_pair(node, IN_SAME_PHASE)], &sampler_num);
2227 if (node->effect->is_single_texture()) {
2228 assert(sampler_num - old_sampler_num == 1);
2229 node->bound_sampler_num = old_sampler_num;
2231 node->bound_sampler_num = -1;
2235 if (phase->is_compute_shader) {
2237 phase->compute_shader_node->effect->get_compute_dimensions(phase->output_width, phase->output_height, &x, &y, &z);
2239 // Uniforms need to come after set_gl_state() _and_ get_compute_dimensions(),
2240 // since they can be updated from there.
2241 setup_uniforms(phase);
2242 glDispatchCompute(x, y, z);
2244 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT);
2247 // Uniforms need to come after set_gl_state(), since they can be updated
2249 setup_uniforms(phase);
2251 // Bind the vertex data.
2252 GLuint vao = resource_pool->create_vec2_vao(phase->attribute_indexes, vbo);
2253 glBindVertexArray(vao);
2255 glDrawArrays(GL_TRIANGLES, 0, 3);
2258 resource_pool->release_vec2_vao(vao);
2261 for (unsigned i = 0; i < phase->effects.size(); ++i) {
2262 Node *node = phase->effects[i];
2263 node->effect->clear_gl_state();
2266 resource_pool->unuse_glsl_program(instance_program_num);
2269 resource_pool->release_fbo(fbo);
2273 void EffectChain::setup_uniforms(Phase *phase)
2275 // TODO: Use UBO blocks.
2276 for (size_t i = 0; i < phase->uniforms_image2d.size(); ++i) {
2277 const Uniform<int> &uniform = phase->uniforms_image2d[i];
2278 if (uniform.location != -1) {
2279 glUniform1iv(uniform.location, uniform.num_values, uniform.value);
2282 for (size_t i = 0; i < phase->uniforms_sampler2d.size(); ++i) {
2283 const Uniform<int> &uniform = phase->uniforms_sampler2d[i];
2284 if (uniform.location != -1) {
2285 glUniform1iv(uniform.location, uniform.num_values, uniform.value);
2288 for (size_t i = 0; i < phase->uniforms_bool.size(); ++i) {
2289 const Uniform<bool> &uniform = phase->uniforms_bool[i];
2290 assert(uniform.num_values == 1);
2291 if (uniform.location != -1) {
2292 glUniform1i(uniform.location, *uniform.value);
2295 for (size_t i = 0; i < phase->uniforms_int.size(); ++i) {
2296 const Uniform<int> &uniform = phase->uniforms_int[i];
2297 if (uniform.location != -1) {
2298 glUniform1iv(uniform.location, uniform.num_values, uniform.value);
2301 for (size_t i = 0; i < phase->uniforms_ivec2.size(); ++i) {
2302 const Uniform<int> &uniform = phase->uniforms_ivec2[i];
2303 if (uniform.location != -1) {
2304 glUniform2iv(uniform.location, uniform.num_values, uniform.value);
2307 for (size_t i = 0; i < phase->uniforms_float.size(); ++i) {
2308 const Uniform<float> &uniform = phase->uniforms_float[i];
2309 if (uniform.location != -1) {
2310 glUniform1fv(uniform.location, uniform.num_values, uniform.value);
2313 for (size_t i = 0; i < phase->uniforms_vec2.size(); ++i) {
2314 const Uniform<float> &uniform = phase->uniforms_vec2[i];
2315 if (uniform.location != -1) {
2316 glUniform2fv(uniform.location, uniform.num_values, uniform.value);
2319 for (size_t i = 0; i < phase->uniforms_vec3.size(); ++i) {
2320 const Uniform<float> &uniform = phase->uniforms_vec3[i];
2321 if (uniform.location != -1) {
2322 glUniform3fv(uniform.location, uniform.num_values, uniform.value);
2325 for (size_t i = 0; i < phase->uniforms_vec4.size(); ++i) {
2326 const Uniform<float> &uniform = phase->uniforms_vec4[i];
2327 if (uniform.location != -1) {
2328 glUniform4fv(uniform.location, uniform.num_values, uniform.value);
2331 for (size_t i = 0; i < phase->uniforms_mat3.size(); ++i) {
2332 const Uniform<Matrix3d> &uniform = phase->uniforms_mat3[i];
2333 assert(uniform.num_values == 1);
2334 if (uniform.location != -1) {
2335 // Convert to float (GLSL has no double matrices).
2337 for (unsigned y = 0; y < 3; ++y) {
2338 for (unsigned x = 0; x < 3; ++x) {
2339 matrixf[y + x * 3] = (*uniform.value)(y, x);
2342 glUniformMatrix3fv(uniform.location, 1, GL_FALSE, matrixf);
2347 void EffectChain::setup_rtt_sampler(int sampler_num, bool use_mipmaps)
2349 glActiveTexture(GL_TEXTURE0 + sampler_num);
2352 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
2355 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
2358 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2360 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2364 } // namespace movit