git.sesse.net Git - ffmpeg/blob - libavfilter/opencl/deshake.cl

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  *
  18  * Copyright (C) 2000, Intel Corporation, all rights reserved.
  19  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  20  * Third party copyrights are property of their respective owners.
  21  *
  22  * Redistribution and use in source and binary forms, with or without modification,
  23  * are permitted provided that the following conditions are met:
  24  *
  25  *   * Redistribution's of source code must retain the above copyright notice,
  26  *     this list of conditions and the following disclaimer.
  27  *
  28  *   * Redistribution's in binary form must reproduce the above copyright notice,
  29  *     this list of conditions and the following disclaimer in the documentation
  30  *     and/or other materials provided with the distribution.
  31  *
  32  *   * The name of the copyright holders may not be used to endorse or promote products
  33  *     derived from this software without specific prior written permission.
  34  *
  35  * This software is provided by the copyright holders and contributors "as is" and
  36  * any express or implied warranties, including, but not limited to, the implied
  37  * warranties of merchantability and fitness for a particular purpose are disclaimed.
  38  * In no event shall the Intel Corporation or contributors be liable for any direct,
  39  * indirect, incidental, special, exemplary, or consequential damages
  40  * (including, but not limited to, procurement of substitute goods or services;
  41  * loss of use, data, or profits; or business interruption) however caused
  42  * and on any theory of liability, whether in contract, strict liability,
  43  * or tort (including negligence or otherwise) arising in any way out of
  44  * the use of this software, even if advised of the possibility of such damage.
  45  */
  46
  47 #define HARRIS_THRESHOLD 3.0f
  48 // Block size over which to compute harris response
  49 //
  50 // Note that changing this will require fiddling with the local array sizes in
  51 // harris_response
  52 #define HARRIS_RADIUS 2
  53 #define DISTANCE_THRESHOLD 80
  54
  55 // Sub-pixel refinement window for feature points
  56 #define REFINE_WIN_HALF_W 5
  57 #define REFINE_WIN_HALF_H 5
  58 #define REFINE_WIN_W 11 // REFINE_WIN_HALF_W * 2 + 1
  59 #define REFINE_WIN_H 11
  60
  61 // Non-maximum suppression window size
  62 #define NONMAX_WIN 30
  63 #define NONMAX_WIN_HALF 15 // NONMAX_WIN / 2
  64
  65 typedef struct PointPair {
  66     // Previous frame
  67     float2 p1;
  68     // Current frame
  69     float2 p2;
  70 } PointPair;
  71
  72 typedef struct SmoothedPointPair {
  73     // Non-smoothed point in current frame
  74     int2 p1;
  75     // Smoothed point in current frame
  76     float2 p2;
  77 } SmoothedPointPair;
  78
  79 typedef struct MotionVector {
  80     PointPair p;
  81     // Used to mark vectors as potential outliers
  82     int should_consider;
  83 } MotionVector;
  84
  85 const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
  86                           CLK_ADDRESS_CLAMP_TO_EDGE |
  87                           CLK_FILTER_NEAREST;
  88
  89 const sampler_t sampler_linear = CLK_NORMALIZED_COORDS_FALSE |
  90                           CLK_ADDRESS_CLAMP_TO_EDGE |
  91                           CLK_FILTER_LINEAR;
  92
  93 const sampler_t sampler_linear_mirror = CLK_NORMALIZED_COORDS_TRUE |
  94                           CLK_ADDRESS_MIRRORED_REPEAT |
  95                           CLK_FILTER_LINEAR;
  96
  97 // Writes to a 1D array at loc, treating it as a 2D array with the same
  98 // dimensions as the global work size.
  99 static void write_to_1d_arrf(__global float *buf, int2 loc, float val) {
 100     buf[loc.x + loc.y * get_global_size(0)] = val;
 101 }
 102
 103 static void write_to_1d_arrul8(__global ulong8 *buf, int2 loc, ulong8 val) {
 104     buf[loc.x + loc.y * get_global_size(0)] = val;
 105 }
 106
 107 static void write_to_1d_arrvec(__global MotionVector *buf, int2 loc, MotionVector val) {
 108     buf[loc.x + loc.y * get_global_size(0)] = val;
 109 }
 110
 111 static void write_to_1d_arrf2(__global float2 *buf, int2 loc, float2 val) {
 112     buf[loc.x + loc.y * get_global_size(0)] = val;
 113 }
 114
 115 static ulong8 read_from_1d_arrul8(__global const ulong8 *buf, int2 loc) {
 116     return buf[loc.x + loc.y * get_global_size(0)];
 117 }
 118
 119 static float2 read_from_1d_arrf2(__global const float2 *buf, int2 loc) {
 120     return buf[loc.x + loc.y * get_global_size(0)];
 121 }
 122
 123 // Returns the grayscale value at the given point.
 124 static float pixel_grayscale(__read_only image2d_t src, int2 loc) {
 125     float4 pixel = read_imagef(src, sampler, loc);
 126     return (pixel.x + pixel.y + pixel.z) / 3.0f;
 127 }
 128
 129 static float convolve(
 130     __local const float *grayscale,
 131     int local_idx_x,
 132     int local_idx_y,
 133     float mask[3][3]
 134 ) {
 135     float ret = 0;
 136
 137     // These loops touch each pixel surrounding loc as well as loc itself
 138     for (int i = 1, i2 = 0; i >= -1; --i, ++i2) {
 139         for (int j = -1, j2 = 0; j <= 1; ++j, ++j2) {
 140             ret += mask[i2][j2] * grayscale[(local_idx_x + 3 + j) + (local_idx_y + 3 + i) * 14];
 141         }
 142     }
 143
 144     return ret;
 145 }
 146
 147 // Sums dx * dy for all pixels within radius of loc
 148 static float sum_deriv_prod(
 149     __local const float *grayscale,
 150     float mask_x[3][3],
 151     float mask_y[3][3]
 152 ) {
 153     float ret = 0;
 154
 155     for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
 156         for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
 157             ret += convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_x) *
 158                    convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_y);
 159         }
 160     }
 161
 162     return ret;
 163 }
 164
 165 // Sums d<>^2 (determined by mask) for all pixels within radius of loc
 166 static float sum_deriv_pow(__local const float *grayscale, float mask[3][3])
 167 {
 168     float ret = 0;
 169
 170     for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {
 171         for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {
 172             float deriv = convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask);
 173             ret += deriv * deriv;
 174         }
 175     }
 176
 177     return ret;
 178 }
 179
 180 // Fills a box with the given radius and pixel around loc
 181 static void draw_box(__write_only image2d_t dst, int2 loc, float4 pixel, int radius)
 182 {
 183     for (int i = -radius; i <= radius; ++i) {
 184         for (int j = -radius; j <= radius; ++j) {
 185             write_imagef(
 186                 dst,
 187                 (int2)(
 188                     // Clamp to avoid writing outside image bounds
 189                     clamp(loc.x + i, 0, get_image_dim(dst).x - 1),
 190                     clamp(loc.y + j, 0, get_image_dim(dst).y - 1)
 191                 ),
 192                 pixel
 193             );
 194         }
 195     }
 196 }
 197
 198 // Converts the src image to grayscale
 199 __kernel void grayscale(
 200     __read_only image2d_t src,
 201     __write_only image2d_t grayscale
 202 ) {
 203     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 204     write_imagef(grayscale, loc, (float4)(pixel_grayscale(src, loc), 0.0f, 0.0f, 1.0f));
 205 }
 206
 207 // This kernel computes the harris response for the given grayscale src image
 208 // within the given radius and writes it to harris_buf
 209 __kernel void harris_response(
 210     __read_only image2d_t grayscale,
 211     __global float *harris_buf
 212 ) {
 213     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 214
 215     if (loc.x > get_image_width(grayscale) - 1 || loc.y > get_image_height(grayscale) - 1) {
 216         write_to_1d_arrf(harris_buf, loc, 0);
 217         return;
 218     }
 219
 220     float scale = 1.0f / ((1 << 2) * HARRIS_RADIUS * 255.0f);
 221
 222     float sobel_mask_x[3][3] = {
 223         {-1, 0, 1},
 224         {-2, 0, 2},
 225         {-1, 0, 1}
 226     };
 227
 228     float sobel_mask_y[3][3] = {
 229         { 1,  2,  1},
 230         { 0,  0,  0},
 231         {-1, -2, -1}
 232     };
 233
 234     // 8 x 8 local work + 3 pixels around each side (needed to accomodate for the
 235     // block size radius of 2)
 236     __local float grayscale_data[196];
 237
 238     int idx = get_group_id(0) * get_local_size(0);
 239     int idy = get_group_id(1) * get_local_size(1);
 240
 241     for (int i = idy - 3, it = 0; i < idy + (int)get_local_size(1) + 3; i++, it++) {
 242         for (int j = idx - 3, jt = 0; j < idx + (int)get_local_size(0) + 3; j++, jt++) {
 243             grayscale_data[jt + it * 14] = read_imagef(grayscale, sampler, (int2)(j, i)).x;
 244         }
 245     }
 246
 247     barrier(CLK_LOCAL_MEM_FENCE);
 248
 249     float sumdxdy = sum_deriv_prod(grayscale_data, sobel_mask_x, sobel_mask_y);
 250     float sumdx2 = sum_deriv_pow(grayscale_data, sobel_mask_x);
 251     float sumdy2 = sum_deriv_pow(grayscale_data, sobel_mask_y);
 252
 253     float trace = sumdx2 + sumdy2;
 254     // r = det(M) - k(trace(M))^2
 255     // k usually between 0.04 to 0.06
 256     float r = (sumdx2 * sumdy2 - sumdxdy * sumdxdy) - 0.04f * (trace * trace) * pown(scale, 4);
 257
 258     // Threshold the r value
 259     harris_buf[loc.x + loc.y * get_image_width(grayscale)] = r * step(HARRIS_THRESHOLD, r);
 260 }
 261
 262 // Gets a patch centered around a float coordinate from a grayscale image using
 263 // bilinear interpolation
 264 static void get_rect_sub_pix(
 265     __read_only image2d_t grayscale,
 266     float *buffer,
 267     int size_x,
 268     int size_y,
 269     float2 center
 270 ) {
 271     float2 offset = ((float2)(size_x, size_y) - 1.0f) * 0.5f;
 272
 273     for (int i = 0; i < size_y; i++) {
 274         for (int j = 0; j < size_x; j++) {
 275             buffer[i * size_x + j] = read_imagef(
 276                 grayscale,
 277                 sampler_linear,
 278                 (float2)(j, i) + center - offset
 279             ).x * 255.0f;
 280         }
 281     }
 282 }
 283
 284 // Refines detected features at a sub-pixel level
 285 //
 286 // This function is ported from OpenCV
 287 static float2 corner_sub_pix(
 288     __read_only image2d_t grayscale,
 289     float2 feature,
 290     float *mask
 291 ) {
 292     float2 init = feature;
 293     int src_width = get_global_size(0);
 294     int src_height = get_global_size(1);
 295
 296     const int max_iters = 40;
 297     const float eps = 0.001f * 0.001f;
 298     int i, j, k;
 299
 300     int iter = 0;
 301     float err = 0;
 302     float subpix[(REFINE_WIN_W + 2) * (REFINE_WIN_H + 2)];
 303     const float flt_epsilon = 0x1.0p-23f;
 304
 305     do {
 306         float2 feature_tmp;
 307         float a = 0, b = 0, c = 0, bb1 = 0, bb2 = 0;
 308
 309         get_rect_sub_pix(grayscale, subpix, REFINE_WIN_W + 2, REFINE_WIN_H + 2, feature);
 310         float *subpix_ptr = subpix;
 311         subpix_ptr += REFINE_WIN_W + 2 + 1;
 312
 313         // process gradient
 314         for (i = 0, k = 0; i < REFINE_WIN_H; i++, subpix_ptr += REFINE_WIN_W + 2) {
 315             float py = i - REFINE_WIN_HALF_H;
 316
 317             for (j = 0; j < REFINE_WIN_W; j++, k++) {
 318                 float m = mask[k];
 319                 float tgx = subpix_ptr[j + 1] - subpix_ptr[j - 1];
 320                 float tgy = subpix_ptr[j + REFINE_WIN_W + 2] - subpix_ptr[j - REFINE_WIN_W - 2];
 321                 float gxx = tgx * tgx * m;
 322                 float gxy = tgx * tgy * m;
 323                 float gyy = tgy * tgy * m;
 324                 float px = j - REFINE_WIN_HALF_W;
 325
 326                 a += gxx;
 327                 b += gxy;
 328                 c += gyy;
 329
 330                 bb1 += gxx * px + gxy * py;
 331                 bb2 += gxy * px + gyy * py;
 332             }
 333         }
 334
 335         float det = a * c - b * b;
 336         if (fabs(det) <= flt_epsilon * flt_epsilon) {
 337             break;
 338         }
 339
 340         // 2x2 matrix inversion
 341         float scale = 1.0f / det;
 342         feature_tmp.x = (float)(feature.x + (c * scale * bb1) - (b * scale * bb2));
 343         feature_tmp.y = (float)(feature.y - (b * scale * bb1) + (a * scale * bb2));
 344         err = dot(feature_tmp - feature, feature_tmp - feature);
 345
 346         feature = feature_tmp;
 347         if (feature.x < 0 || feature.x >= src_width || feature.y < 0 || feature.y >= src_height) {
 348             break;
 349         }
 350     } while (++iter < max_iters && err > eps);
 351
 352     // Make sure new point isn't too far from the initial point (indicates poor convergence)
 353     if (fabs(feature.x - init.x) > REFINE_WIN_HALF_W || fabs(feature.y - init.y) > REFINE_WIN_HALF_H) {
 354         feature = init;
 355     }
 356
 357     return feature;
 358 }
 359
 360 // Performs non-maximum suppression on the harris response and writes the resulting
 361 // feature locations to refined_features.
 362 //
 363 // Assumes that refined_features and the global work sizes are set up such that the image
 364 // is split up into a grid of 32x32 blocks where each block has a single slot in the
 365 // refined_features buffer. This kernel finds the best corner in each block (if the
 366 // block has any) and writes it to the corresponding slot in the buffer.
 367 //
 368 // If subpixel_refine is true, the features are additionally refined at a sub-pixel
 369 // level for increased precision.
 370 __kernel void refine_features(
 371     __read_only image2d_t grayscale,
 372     __global const float *harris_buf,
 373     __global float2 *refined_features,
 374     int subpixel_refine
 375 ) {
 376     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 377     // The location in the grayscale buffer rather than the compacted grid
 378     int2 loc_i = (int2)(loc.x * 32, loc.y * 32);
 379
 380     float new_val;
 381     float max_val = 0;
 382     float2 loc_max = (float2)(-1, -1);
 383
 384     int end_x = min(loc_i.x + 32, (int)get_image_dim(grayscale).x - 1);
 385     int end_y = min(loc_i.y + 32, (int)get_image_dim(grayscale).y - 1);
 386
 387     for (int i = loc_i.x; i < end_x; ++i) {
 388         for (int j = loc_i.y; j < end_y; ++j) {
 389             new_val = harris_buf[i + j * get_image_dim(grayscale).x];
 390
 391             if (new_val > max_val) {
 392                 max_val = new_val;
 393                 loc_max = (float2)(i, j);
 394             }
 395         }
 396     }
 397
 398     if (max_val == 0) {
 399         // There are no features in this part of the frame
 400         write_to_1d_arrf2(refined_features, loc, loc_max);
 401         return;
 402     }
 403
 404     if (subpixel_refine) {
 405         float mask[REFINE_WIN_H * REFINE_WIN_W];
 406         for (int i = 0; i < REFINE_WIN_H; i++) {
 407             float y = (float)(i - REFINE_WIN_HALF_H) / REFINE_WIN_HALF_H;
 408             float vy = exp(-y * y);
 409
 410             for (int j = 0; j < REFINE_WIN_W; j++) {
 411                 float x = (float)(j - REFINE_WIN_HALF_W) / REFINE_WIN_HALF_W;
 412                 mask[i * REFINE_WIN_W + j] = (float)(vy * exp(-x * x));
 413             }
 414         }
 415
 416         loc_max = corner_sub_pix(grayscale, loc_max, mask);
 417     }
 418
 419     write_to_1d_arrf2(refined_features, loc, loc_max);
 420 }
 421
 422 // Extracts BRIEF descriptors from the grayscale src image for the given features
 423 // using the provided sampler.
 424 __kernel void brief_descriptors(
 425     __read_only image2d_t grayscale,
 426     __global const float2 *refined_features,
 427     // for 512 bit descriptors
 428     __global ulong8 *desc_buf,
 429     __global const PointPair *brief_pattern
 430 ) {
 431     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 432     float2 feature = read_from_1d_arrf2(refined_features, loc);
 433
 434     // There was no feature in this part of the frame
 435     if (feature.x == -1) {
 436         write_to_1d_arrul8(desc_buf, loc, (ulong8)(0));
 437         return;
 438     }
 439
 440     ulong8 desc = 0;
 441     ulong *p = &desc;
 442
 443     for (int i = 0; i < 8; ++i) {
 444         for (int j = 0; j < 64; ++j) {
 445             PointPair pair = brief_pattern[j * (i + 1)];
 446             float l1 = read_imagef(grayscale, sampler_linear, feature + pair.p1).x;
 447             float l2 = read_imagef(grayscale, sampler_linear, feature + pair.p2).x;
 448
 449             if (l1 < l2) {
 450                 p[i] |= 1UL << j;
 451             }
 452         }
 453     }
 454
 455     write_to_1d_arrul8(desc_buf, loc, desc);
 456 }
 457
 458 // Given buffers with descriptors for the current and previous frame, determines
 459 // which ones match, writing correspondences to matches_buf.
 460 //
 461 // Feature and descriptor buffers are assumed to be compacted (each element sourced
 462 // from a 32x32 block in the frame being processed).
 463 __kernel void match_descriptors(
 464     __global const float2 *prev_refined_features,
 465     __global const float2 *refined_features,
 466     __global const ulong8 *desc_buf,
 467     __global const ulong8 *prev_desc_buf,
 468     __global MotionVector *matches_buf
 469 ) {
 470     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 471     ulong8 desc = read_from_1d_arrul8(desc_buf, loc);
 472     const int search_radius = 3;
 473
 474     MotionVector invalid_vector = (MotionVector) {
 475         (PointPair) {
 476             (float2)(-1, -1),
 477             (float2)(-1, -1)
 478         },
 479         0
 480     };
 481
 482     if (desc.s0 == 0 && desc.s1 == 0) {
 483         // There was no feature in this part of the frame
 484         write_to_1d_arrvec(
 485             matches_buf,
 486             loc,
 487             invalid_vector
 488         );
 489         return;
 490     }
 491
 492     int2 start = max(loc - search_radius, 0);
 493     int2 end = min(loc + search_radius, (int2)(get_global_size(0) - 1, get_global_size(1) - 1));
 494
 495     for (int i = start.x; i < end.x; ++i) {
 496         for (int j = start.y; j < end.y; ++j) {
 497             int2 prev_point = (int2)(i, j);
 498             int total_dist = 0;
 499
 500             ulong8 prev_desc = read_from_1d_arrul8(prev_desc_buf, prev_point);
 501
 502             if (prev_desc.s0 == 0 && prev_desc.s1 == 0) {
 503                 continue;
 504             }
 505
 506             ulong *prev_desc_p = &prev_desc;
 507             ulong *desc_p = &desc;
 508
 509             for (int i = 0; i < 8; i++) {
 510                 total_dist += popcount(desc_p[i] ^ prev_desc_p[i]);
 511             }
 512
 513             if (total_dist < DISTANCE_THRESHOLD) {
 514                 write_to_1d_arrvec(
 515                     matches_buf,
 516                     loc,
 517                     (MotionVector) {
 518                         (PointPair) {
 519                             read_from_1d_arrf2(prev_refined_features, prev_point),
 520                             read_from_1d_arrf2(refined_features, loc)
 521                         },
 522                         1
 523                     }
 524                 );
 525
 526                 return;
 527             }
 528         }
 529     }
 530
 531     // There is no found match for this point
 532     write_to_1d_arrvec(
 533         matches_buf,
 534         loc,
 535         invalid_vector
 536     );
 537 }
 538
 539 // Returns the position of the given point after the transform is applied
 540 static float2 transformed_point(float2 p, __global const float *transform) {
 541     float2 ret;
 542
 543     ret.x = p.x * transform[0] + p.y * transform[1] + transform[2];
 544     ret.y = p.x * transform[3] + p.y * transform[4] + transform[5];
 545
 546     return ret;
 547 }
 548
 549
 550 // Performs the given transform on the src image
 551 __kernel void transform(
 552     __read_only image2d_t src,
 553     __write_only image2d_t dst,
 554     __global const float *transform
 555 ) {
 556     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 557     float2 norm = convert_float2(get_image_dim(src));
 558
 559     write_imagef(
 560         dst,
 561         loc,
 562         read_imagef(
 563             src,
 564             sampler_linear_mirror,
 565             transformed_point((float2)(loc.x, loc.y), transform) / norm
 566         )
 567     );
 568 }
 569
 570 // Returns the new location of the given point using the given crop bounding box
 571 // and the width and height of the original frame.
 572 static float2 cropped_point(
 573     float2 p,
 574     float2 top_left,
 575     float2 bottom_right,
 576     int2 orig_dim
 577 ) {
 578     float2 ret;
 579
 580     float crop_width  = bottom_right.x - top_left.x;
 581     float crop_height = bottom_right.y - top_left.y;
 582
 583     float width_norm = p.x / (float)orig_dim.x;
 584     float height_norm = p.y / (float)orig_dim.y;
 585
 586     ret.x = (width_norm * crop_width) + top_left.x;
 587     ret.y = (height_norm * crop_height) + ((float)orig_dim.y - bottom_right.y);
 588
 589     return ret;
 590 }
 591
 592 // Upscales the given cropped region to the size of the original frame
 593 __kernel void crop_upscale(
 594     __read_only image2d_t src,
 595     __write_only image2d_t dst,
 596     float2 top_left,
 597     float2 bottom_right
 598 ) {
 599     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 600
 601     write_imagef(
 602         dst,
 603         loc,
 604         read_imagef(
 605             src,
 606             sampler_linear,
 607             cropped_point((float2)(loc.x, loc.y), top_left, bottom_right, get_image_dim(dst))
 608         )
 609     );
 610 }
 611
 612 // Draws boxes to represent the given point matches and uses the given transform
 613 // and crop info to make sure their positions are accurate on the transformed frame.
 614 //
 615 // model_matches is an array of three points that were used by the RANSAC process
 616 // to generate the given transform
 617 __kernel void draw_debug_info(
 618     __write_only image2d_t dst,
 619     __global const MotionVector *matches,
 620     __global const MotionVector *model_matches,
 621     int num_model_matches,
 622     __global const float *transform
 623 ) {
 624     int loc = get_global_id(0);
 625     MotionVector vec = matches[loc];
 626     // Black box: matched point that RANSAC considered an outlier
 627     float4 big_rect_color = (float4)(0.1f, 0.1f, 0.1f, 1.0f);
 628
 629     if (vec.should_consider) {
 630         // Green box: matched point that RANSAC considered an inlier
 631         big_rect_color = (float4)(0.0f, 1.0f, 0.0f, 1.0f);
 632     }
 633
 634     for (int i = 0; i < num_model_matches; i++) {
 635         if (vec.p.p2.x == model_matches[i].p.p2.x && vec.p.p2.y == model_matches[i].p.p2.y) {
 636             // Orange box: point used to calculate model
 637             big_rect_color = (float4)(1.0f, 0.5f, 0.0f, 1.0f);
 638         }
 639     }
 640
 641     float2 transformed_p1 = transformed_point(vec.p.p1, transform);
 642     float2 transformed_p2 = transformed_point(vec.p.p2, transform);
 643
 644     draw_box(dst, (int2)(transformed_p2.x, transformed_p2.y), big_rect_color, 5);
 645     // Small light blue box: the point in the previous frame
 646     draw_box(dst, (int2)(transformed_p1.x, transformed_p1.y), (float4)(0.0f, 0.3f, 0.7f, 1.0f), 3);
 647 }