+
+ // Since the GPU gives us bilinear sampling for free, we can get two
+ // samples for the price of one (for every but the center sample,
+ // in which case this trick doesn't buy us anything). Simply sample
+ // between the two pixel centers, and we can do with fewer weights.
+ // (This is right even in the vertical pass where we don't actually
+ // sample between the pixels, because we have linear interpolation
+ // there too.)
+ //
+ // We pack the parameters into a float4: The relative sample coordinates
+ // in (x,y), and the weight in z. w is unused.
+ float samples[2 * (NUM_TAPS / 2 + 1)];
+
+ // Center sample.
+ samples[2 * 0 + 0] = 0.0f;
+ samples[2 * 0 + 1] = weight[0];
+
+ // All other samples.
+ for (unsigned i = 1; i < NUM_TAPS / 2 + 1; ++i) {
+ unsigned base_pos = i * 2 - 1;
+ float w1 = weight[base_pos];
+ float w2 = weight[base_pos + 1];
+
+ float offset, total_weight;
+ combine_two_samples(w1, w2, &offset, &total_weight, NULL);
+
+ if (direction == HORIZONTAL) {
+ samples[2 * i + 0] = (base_pos + offset) / (float)width;
+ } else if (direction == VERTICAL) {
+ samples[2 * i + 0] = (base_pos + offset) / (float)height;
+ } else {
+ assert(false);
+ }
+
+ samples[2 * i + 1] = total_weight;