git.sesse.net Git - movit/blob - deinterlace_effect.frag

   1 // Implicit uniforms:
   2 // uniform int PREFIX(current_field_position);
   3 // uniform float PREFIX(num_lines);
   4 // uniform float PREFIX(self_offset);
   5 // uniform float PREFIX(inv_width);
   6 // uniform float PREFIX(current_offset)[2];
   7 // uniform float PREFIX(other_offset)[3];
   8
   9 // The best explanation of YADIF that I've seen is actually a pseudocode
  10 // reimplementation from the Doom9 forum:
  11 //
  12 //   http://forum.doom9.org/showthread.php?p=980375#post980375
  13 //
  14 // We generally follow its terminology instead of the original C source
  15 // (which I'll refer to as “C YADIF”), although I've used the C source as a
  16 // reference to double-check at times. We're not bit-exact the same as
  17 // C YADIF; in particular, we work in linear light, and left/right edge
  18 // handling might also be a bit different (for top/bottom edge handling,
  19 // C YADIF repeats texels like we do). Also, C YADIF generally works on
  20 // Y', Cb and Cr planes separately, while we work on the entire RGBA triplet
  21 // and do our spatial interpolation decisions based on the pixel as a whole,
  22 // so our decision metric also naturally becomes different.
  23
  24 #define DIFF(s1, s2) dot((s1) - (s2), (s1) - (s2))
  25
  26 vec4 FUNCNAME(vec2 tc) {
  27         int yi = int(round(tc.y * PREFIX(num_lines) - 0.5f));
  28
  29         // Figure out if we just want to keep the current line or if
  30         // we need to interpolate. This branch is obviously divergent,
  31         // but the very nature of deinterlacing would seem to require that.
  32         //
  33         // Note that since we have bottom-left origin, yi % 2 will return 0
  34         // for bottom and 1 for top.
  35         if ((yi % 2) != PREFIX(current_field_position)) {
  36                 return INPUT3(vec2(tc.x, tc.y + PREFIX(self_offset)));
  37         }
  38
  39         // First, estimate the current pixel from the neighboring pixels in the
  40         // same field (spatial interpolation). We try first 0 degrees (straight
  41         // up/down), then ±45 degrees and then finally ±63 degrees. The best of
  42         // these, as determined by the “spatial score” (basically sum of squared
  43         // differences in three neighboring pixels), is kept.
  44         //
  45         // The C version of YADIF goesn't check +63° unless +45° gave an improvement,
  46         // and similarly not -63° unless -45° did. The MMX version goes through pains
  47         // to simulate the same, but notes that it “hurts both quality and speed”.
  48         // We're not bit-exact the same as the C version anyway, and not sampling
  49         // ±63° would probably be a rather divergent branch, so we just always do it.
  50
  51         // a b c d e f g     ↑ y
  52         //       x           |
  53         // h i j k l m n     +--> x
  54
  55         vec2 a_pos = vec2(tc.x - 3.0 * PREFIX(inv_width), tc.y + PREFIX(current_offset)[1]);
  56         vec2 b_pos = vec2(tc.x - 2.0 * PREFIX(inv_width), a_pos.y);
  57         vec2 c_pos = vec2(tc.x -       PREFIX(inv_width), a_pos.y);
  58         vec2 d_pos = vec2(tc.x,                           a_pos.y);
  59         vec2 e_pos = vec2(tc.x +       PREFIX(inv_width), a_pos.y);
  60         vec2 f_pos = vec2(tc.x + 2.0 * PREFIX(inv_width), a_pos.y);
  61         vec2 g_pos = vec2(tc.x + 3.0 * PREFIX(inv_width), a_pos.y);
  62
  63         vec2 h_pos = vec2(tc.x - 3.0 * PREFIX(inv_width), tc.y + PREFIX(current_offset)[0]);
  64         vec2 i_pos = vec2(tc.x - 2.0 * PREFIX(inv_width), h_pos.y);
  65         vec2 j_pos = vec2(tc.x -       PREFIX(inv_width), h_pos.y);
  66         vec2 k_pos = vec2(tc.x,                           h_pos.y);
  67         vec2 l_pos = vec2(tc.x +       PREFIX(inv_width), h_pos.y);
  68         vec2 m_pos = vec2(tc.x + 2.0 * PREFIX(inv_width), h_pos.y);
  69         vec2 n_pos = vec2(tc.x + 3.0 * PREFIX(inv_width), h_pos.y);
  70
  71         vec4 a = INPUT3(a_pos);
  72         vec4 b = INPUT3(b_pos);
  73         vec4 c = INPUT3(c_pos);
  74         vec4 d = INPUT3(d_pos);
  75         vec4 e = INPUT3(e_pos);
  76         vec4 f = INPUT3(f_pos);
  77         vec4 g = INPUT3(g_pos);
  78         vec4 h = INPUT3(h_pos);
  79         vec4 i = INPUT3(i_pos);
  80         vec4 j = INPUT3(j_pos);
  81         vec4 k = INPUT3(k_pos);
  82         vec4 l = INPUT3(l_pos);
  83         vec4 m = INPUT3(m_pos);
  84         vec4 n = INPUT3(n_pos);
  85
  86         // 0 degrees. Note that pred is actually twice the real spatial prediction;
  87         // we halve it later to same some arithmetic. Also, our spatial score is not
  88         // the same as in C YADIF; we use the total squared sum over all four
  89         // channels instead of deinterlacing each channel separately.
  90         //
  91         // Note that there's a small, arbitrary bonus for this first alternative,
  92         // so that vertical interpolation wins if everything else is equal.
  93         vec4 pred = d + k;
  94         float score;
  95         float best_score = DIFF(c, j) + DIFF(d, k) + DIFF(e, l) - 1e-4;
  96
  97         // -45 degrees.
  98         score = DIFF(b, k) + DIFF(c, l) + DIFF(d, m);
  99         if (score < best_score) {
 100                 pred = c + l;
 101                 best_score = score;
 102         }
 103
 104         // -63 degrees.
 105         score = DIFF(a, l) + DIFF(b, m) + DIFF(c, n);
 106         if (score < best_score) {
 107                 pred = b + m;
 108                 best_score = score;
 109         }
 110
 111         // +45 degrees.
 112         score = DIFF(d, i) + DIFF(e, j) + DIFF(f, k);
 113         if (score < best_score) {
 114                 pred = e + j;
 115                 best_score = score;
 116         }
 117
 118         // +63 degrees.
 119         score = DIFF(e, h) + DIFF(f, i) + DIFF(g, j);
 120         if (score < best_score) {
 121                 pred = f + i;
 122                 // best_score isn't used anymore.
 123         }
 124
 125         pred *= 0.5f;
 126
 127         // Now we do a temporal prediction (p2) of this pixel based on the previous
 128         // and next fields. The spatial prediction is clamped so that it is not
 129         // too far from this temporal prediction, where “too far” is based on
 130         // the amount of local temporal change. (In other words, the temporal prediction
 131         // is the safe choice, and the question is how far away from that we'll let
 132         // our spatial choice run.) Note that here, our difference metric
 133         // _is_ the same as C YADIF, namely per-channel abs.
 134         //
 135         // The sample positions look like this; in order to avoid variable name conflicts
 136         // with the spatial interpolation, we use uppercase names. x is, again,
 137         // the current pixel we're trying to estimate.
 138         //
 139         //     C   H      ↑ y
 140         //   A   F   K    |
 141         //     D x I      |
 142         //   B   G   L    |
 143         //     E   J      +-----> time
 144         //
 145         vec2 AFK_pos = d_pos;
 146         vec2 BGL_pos = k_pos;
 147         vec4 A = INPUT1(AFK_pos);
 148         vec4 B = INPUT1(BGL_pos);
 149         vec4 F = d;
 150         vec4 G = k;
 151         vec4 K = INPUT5(AFK_pos);
 152         vec4 L = INPUT5(BGL_pos);
 153
 154         vec2 CH_pos = vec2(tc.x, tc.y + PREFIX(other_offset)[2]);
 155         vec2 DI_pos = vec2(tc.x, tc.y + PREFIX(other_offset)[1]);
 156         vec2 EJ_pos = vec2(tc.x, tc.y + PREFIX(other_offset)[0]);
 157
 158         vec4 C = INPUT2(CH_pos);
 159         vec4 D = INPUT2(DI_pos);
 160         vec4 E = INPUT2(EJ_pos);
 161
 162         vec4 H = INPUT4(CH_pos);
 163         vec4 I = INPUT4(DI_pos);
 164         vec4 J = INPUT4(EJ_pos);
 165
 166         // Find temporal differences around this line, using all five fields.
 167         // tdiff0 is around the current field, tdiff1 is around the previous one,
 168         // tdiff2 is around the next one.
 169         vec4 tdiff0 = abs(D - I);
 170         vec4 tdiff1 = abs(A - F) + abs(B - G);  // Actually twice tdiff1.
 171         vec4 tdiff2 = abs(K - F) + abs(L - G);  // Actually twice tdiff2.
 172         vec4 diff = max(tdiff0, 0.5f * max(tdiff1, tdiff2));
 173
 174         // The following part is the spatial interlacing check, which loosens up the
 175         // allowable temporal change. (See also the comments in the .h file.)
 176         // It costs us four extra loads (C, E, H, J) and a few extra ALU ops;
 177         // we're already very load-heavy, so the extra ALU is effectively free.
 178         // It costs about 18% performance in some benchmarks, which squares
 179         // well with going from 20 to 24 loads (a 20% increase), although for
 180         // total overall performance in longer chains, the difference is nearly zero.
 181         //
 182         // The basic idea is seemingly to allow more change if there are large spatial
 183         // vertical changes, even if there are few temporal changes. These differences
 184         // are signed, though, which make it more tricky to follow, although they seem
 185         // to reduce into some sort of pseudo-abs. I will not claim to understand them
 186         // very well.
 187         //
 188         // We start by temporally interpolating the current vertical line (p0–p4):
 189         //
 190         //     C p0 H      ↑ y
 191         //   A   p1   K    |
 192         //     D p2 I      |
 193         //   B   p3   L    |
 194         //     E p4 J      +-----> time
 195         //
 196         // YADIF_ENABLE_SPATIAL_INTERLACING_CHECK will be #defined to 1
 197         // if the check is enabled. Otherwise, the compiler should
 198         // be able to remove the dependent code quite easily.
 199         vec4 p0 = 0.5f * (C + H);
 200         vec4 p1 = F;
 201         vec4 p2 = 0.5f * (D + I);
 202         vec4 p3 = G;
 203         vec4 p4 = 0.5f * (E + J);
 204
 205 #if YADIF_ENABLE_SPATIAL_INTERLACING_CHECK
 206         vec4 max_ = max(max(p2 - p3, p2 - p1), min(p0 - p1, p4 - p3));
 207         vec4 min_ = min(min(p2 - p3, p2 - p1), max(p0 - p1, p4 - p3));
 208         diff = max(diff, max(min_, -max_));
 209 #endif
 210
 211         return clamp(pred, p2 - diff, p2 + diff);
 212 }
 213
 214 #undef DIFF
 215 #undef YADIF_ENABLE_SPATIAL_INTERLACING_CHECK