]> git.sesse.net Git - pistorm/blob - raylib_pi4_test/external/stb_image_resize.h
Update raylib files and Makefile for Pi 4 testing
[pistorm] / raylib_pi4_test / external / stb_image_resize.h
1 /* stb_image_resize - v0.96 - public domain image resizing
2    by Jorge L Rodriguez (@VinoBS) - 2014
3    http://github.com/nothings/stb
4
5    Written with emphasis on usability, portability, and efficiency. (No
6    SIMD or threads, so it be easily outperformed by libs that use those.)
7    Only scaling and translation is supported, no rotations or shears.
8    Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation.
9
10    COMPILING & LINKING
11       In one C/C++ file that #includes this file, do this:
12          #define STB_IMAGE_RESIZE_IMPLEMENTATION
13       before the #include. That will create the implementation in that file.
14
15    QUICKSTART
16       stbir_resize_uint8(      input_pixels , in_w , in_h , 0,
17                                output_pixels, out_w, out_h, 0, num_channels)
18       stbir_resize_float(...)
19       stbir_resize_uint8_srgb( input_pixels , in_w , in_h , 0,
20                                output_pixels, out_w, out_h, 0,
21                                num_channels , alpha_chan  , 0)
22       stbir_resize_uint8_srgb_edgemode(
23                                input_pixels , in_w , in_h , 0,
24                                output_pixels, out_w, out_h, 0,
25                                num_channels , alpha_chan  , 0, STBIR_EDGE_CLAMP)
26                                                             // WRAP/REFLECT/ZERO
27
28    FULL API
29       See the "header file" section of the source for API documentation.
30
31    ADDITIONAL DOCUMENTATION
32
33       SRGB & FLOATING POINT REPRESENTATION
34          The sRGB functions presume IEEE floating point. If you do not have
35          IEEE floating point, define STBIR_NON_IEEE_FLOAT. This will use
36          a slower implementation.
37
38       MEMORY ALLOCATION
39          The resize functions here perform a single memory allocation using
40          malloc. To control the memory allocation, before the #include that
41          triggers the implementation, do:
42
43             #define STBIR_MALLOC(size,context) ...
44             #define STBIR_FREE(ptr,context)   ...
45
46          Each resize function makes exactly one call to malloc/free, so to use
47          temp memory, store the temp memory in the context and return that.
48
49       ASSERT
50          Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
51
52       OPTIMIZATION
53          Define STBIR_SATURATE_INT to compute clamp values in-range using
54          integer operations instead of float operations. This may be faster
55          on some platforms.
56
57       DEFAULT FILTERS
58          For functions which don't provide explicit control over what filters
59          to use, you can change the compile-time defaults with
60
61             #define STBIR_DEFAULT_FILTER_UPSAMPLE     STBIR_FILTER_something
62             #define STBIR_DEFAULT_FILTER_DOWNSAMPLE   STBIR_FILTER_something
63
64          See stbir_filter in the header-file section for the list of filters.
65
66       NEW FILTERS
67          A number of 1D filter kernels are used. For a list of
68          supported filters see the stbir_filter enum. To add a new filter,
69          write a filter function and add it to stbir__filter_info_table.
70
71       PROGRESS
72          For interactive use with slow resize operations, you can install
73          a progress-report callback:
74
75             #define STBIR_PROGRESS_REPORT(val)   some_func(val)
76
77          The parameter val is a float which goes from 0 to 1 as progress is made.
78
79          For example:
80
81             static void my_progress_report(float progress);
82             #define STBIR_PROGRESS_REPORT(val) my_progress_report(val)
83
84             #define STB_IMAGE_RESIZE_IMPLEMENTATION
85             #include "stb_image_resize.h"
86
87             static void my_progress_report(float progress)
88             {
89                printf("Progress: %f%%\n", progress*100);
90             }
91
92       MAX CHANNELS
93          If your image has more than 64 channels, define STBIR_MAX_CHANNELS
94          to the max you'll have.
95
96       ALPHA CHANNEL
97          Most of the resizing functions provide the ability to control how
98          the alpha channel of an image is processed. The important things
99          to know about this:
100
101          1. The best mathematically-behaved version of alpha to use is
102          called "premultiplied alpha", in which the other color channels
103          have had the alpha value multiplied in. If you use premultiplied
104          alpha, linear filtering (such as image resampling done by this
105          library, or performed in texture units on GPUs) does the "right
106          thing". While premultiplied alpha is standard in the movie CGI
107          industry, it is still uncommon in the videogame/real-time world.
108
109          If you linearly filter non-premultiplied alpha, strange effects
110          occur. (For example, the 50/50 average of 99% transparent bright green
111          and 1% transparent black produces 50% transparent dark green when
112          non-premultiplied, whereas premultiplied it produces 50%
113          transparent near-black. The former introduces green energy
114          that doesn't exist in the source image.)
115
116          2. Artists should not edit premultiplied-alpha images; artists
117          want non-premultiplied alpha images. Thus, art tools generally output
118          non-premultiplied alpha images.
119
120          3. You will get best results in most cases by converting images
121          to premultiplied alpha before processing them mathematically.
122
123          4. If you pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED, the
124          resizer does not do anything special for the alpha channel;
125          it is resampled identically to other channels. This produces
126          the correct results for premultiplied-alpha images, but produces
127          less-than-ideal results for non-premultiplied-alpha images.
128
129          5. If you do not pass the flag STBIR_FLAG_ALPHA_PREMULTIPLIED,
130          then the resizer weights the contribution of input pixels
131          based on their alpha values, or, equivalently, it multiplies
132          the alpha value into the color channels, resamples, then divides
133          by the resultant alpha value. Input pixels which have alpha=0 do
134          not contribute at all to output pixels unless _all_ of the input
135          pixels affecting that output pixel have alpha=0, in which case
136          the result for that pixel is the same as it would be without
137          STBIR_FLAG_ALPHA_PREMULTIPLIED. However, this is only true for
138          input images in integer formats. For input images in float format,
139          input pixels with alpha=0 have no effect, and output pixels
140          which have alpha=0 will be 0 in all channels. (For float images,
141          you can manually achieve the same result by adding a tiny epsilon
142          value to the alpha channel of every image, and then subtracting
143          or clamping it at the end.)
144
145          6. You can suppress the behavior described in #5 and make
146          all-0-alpha pixels have 0 in all channels by #defining
147          STBIR_NO_ALPHA_EPSILON.
148
149          7. You can separately control whether the alpha channel is
150          interpreted as linear or affected by the colorspace. By default
151          it is linear; you almost never want to apply the colorspace.
152          (For example, graphics hardware does not apply sRGB conversion
153          to the alpha channel.)
154
155    CONTRIBUTORS
156       Jorge L Rodriguez: Implementation
157       Sean Barrett: API design, optimizations
158       Aras Pranckevicius: bugfix
159       Nathan Reed: warning fixes
160
161    REVISIONS
162       0.97 (2020-02-02) fixed warning
163       0.96 (2019-03-04) fixed warnings
164       0.95 (2017-07-23) fixed warnings
165       0.94 (2017-03-18) fixed warnings
166       0.93 (2017-03-03) fixed bug with certain combinations of heights
167       0.92 (2017-01-02) fix integer overflow on large (>2GB) images
168       0.91 (2016-04-02) fix warnings; fix handling of subpixel regions
169       0.90 (2014-09-17) first released version
170
171    LICENSE
172      See end of file for license information.
173
174    TODO
175       Don't decode all of the image data when only processing a partial tile
176       Don't use full-width decode buffers when only processing a partial tile
177       When processing wide images, break processing into tiles so data fits in L1 cache
178       Installable filters?
179       Resize that respects alpha test coverage
180          (Reference code: FloatImage::alphaTestCoverage and FloatImage::scaleAlphaToCoverage:
181          https://code.google.com/p/nvidia-texture-tools/source/browse/trunk/src/nvimage/FloatImage.cpp )
182 */
183
184 #ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE_H
185 #define STBIR_INCLUDE_STB_IMAGE_RESIZE_H
186
187 #ifdef _MSC_VER
188 typedef unsigned char  stbir_uint8;
189 typedef unsigned short stbir_uint16;
190 typedef unsigned int   stbir_uint32;
191 #else
192 #include <stdint.h>
193 typedef uint8_t  stbir_uint8;
194 typedef uint16_t stbir_uint16;
195 typedef uint32_t stbir_uint32;
196 #endif
197
198 #ifndef STBIRDEF
199 #ifdef STB_IMAGE_RESIZE_STATIC
200 #define STBIRDEF static
201 #else
202 #ifdef __cplusplus
203 #define STBIRDEF extern "C"
204 #else
205 #define STBIRDEF extern
206 #endif
207 #endif
208 #endif
209
210 //////////////////////////////////////////////////////////////////////////////
211 //
212 // Easy-to-use API:
213 //
214 //     * "input pixels" points to an array of image data with 'num_channels' channels (e.g. RGB=3, RGBA=4)
215 //     * input_w is input image width (x-axis), input_h is input image height (y-axis)
216 //     * stride is the offset between successive rows of image data in memory, in bytes. you can
217 //       specify 0 to mean packed continuously in memory
218 //     * alpha channel is treated identically to other channels.
219 //     * colorspace is linear or sRGB as specified by function name
220 //     * returned result is 1 for success or 0 in case of an error.
221 //       #define STBIR_ASSERT() to trigger an assert on parameter validation errors.
222 //     * Memory required grows approximately linearly with input and output size, but with
223 //       discontinuities at input_w == output_w and input_h == output_h.
224 //     * These functions use a "default" resampling filter defined at compile time. To change the filter,
225 //       you can change the compile-time defaults by #defining STBIR_DEFAULT_FILTER_UPSAMPLE
226 //       and STBIR_DEFAULT_FILTER_DOWNSAMPLE, or you can use the medium-complexity API.
227
228 STBIRDEF int stbir_resize_uint8(     const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
229                                            unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
230                                      int num_channels);
231
232 STBIRDEF int stbir_resize_float(     const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
233                                            float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
234                                      int num_channels);
235
236
237 // The following functions interpret image data as gamma-corrected sRGB.
238 // Specify STBIR_ALPHA_CHANNEL_NONE if you have no alpha channel,
239 // or otherwise provide the index of the alpha channel. Flags value
240 // of 0 will probably do the right thing if you're not sure what
241 // the flags mean.
242
243 #define STBIR_ALPHA_CHANNEL_NONE       -1
244
245 // Set this flag if your texture has premultiplied alpha. Otherwise, stbir will
246 // use alpha-weighted resampling (effectively premultiplying, resampling,
247 // then unpremultiplying).
248 #define STBIR_FLAG_ALPHA_PREMULTIPLIED    (1 << 0)
249 // The specified alpha channel should be handled as gamma-corrected value even
250 // when doing sRGB operations.
251 #define STBIR_FLAG_ALPHA_USES_COLORSPACE  (1 << 1)
252
253 STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
254                                            unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
255                                      int num_channels, int alpha_channel, int flags);
256
257
258 typedef enum
259 {
260     STBIR_EDGE_CLAMP   = 1,
261     STBIR_EDGE_REFLECT = 2,
262     STBIR_EDGE_WRAP    = 3,
263     STBIR_EDGE_ZERO    = 4,
264 } stbir_edge;
265
266 // This function adds the ability to specify how requests to sample off the edge of the image are handled.
267 STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
268                                                     unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
269                                               int num_channels, int alpha_channel, int flags,
270                                               stbir_edge edge_wrap_mode);
271
272 //////////////////////////////////////////////////////////////////////////////
273 //
274 // Medium-complexity API
275 //
276 // This extends the easy-to-use API as follows:
277 //
278 //     * Alpha-channel can be processed separately
279 //       * If alpha_channel is not STBIR_ALPHA_CHANNEL_NONE
280 //         * Alpha channel will not be gamma corrected (unless flags&STBIR_FLAG_GAMMA_CORRECT)
281 //         * Filters will be weighted by alpha channel (unless flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)
282 //     * Filter can be selected explicitly
283 //     * uint16 image type
284 //     * sRGB colorspace available for all types
285 //     * context parameter for passing to STBIR_MALLOC
286
287 typedef enum
288 {
289     STBIR_FILTER_DEFAULT      = 0,  // use same filter type that easy-to-use API chooses
290     STBIR_FILTER_BOX          = 1,  // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
291     STBIR_FILTER_TRIANGLE     = 2,  // On upsampling, produces same results as bilinear texture filtering
292     STBIR_FILTER_CUBICBSPLINE = 3,  // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
293     STBIR_FILTER_CATMULLROM   = 4,  // An interpolating cubic spline
294     STBIR_FILTER_MITCHELL     = 5,  // Mitchell-Netrevalli filter with B=1/3, C=1/3
295 } stbir_filter;
296
297 typedef enum
298 {
299     STBIR_COLORSPACE_LINEAR,
300     STBIR_COLORSPACE_SRGB,
301
302     STBIR_MAX_COLORSPACES,
303 } stbir_colorspace;
304
305 // The following functions are all identical except for the type of the image data
306
307 STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
308                                                unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
309                                          int num_channels, int alpha_channel, int flags,
310                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
311                                          void *alloc_context);
312
313 STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels  , int input_w , int input_h , int input_stride_in_bytes,
314                                                stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
315                                          int num_channels, int alpha_channel, int flags,
316                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
317                                          void *alloc_context);
318
319 STBIRDEF int stbir_resize_float_generic( const float *input_pixels         , int input_w , int input_h , int input_stride_in_bytes,
320                                                float *output_pixels        , int output_w, int output_h, int output_stride_in_bytes,
321                                          int num_channels, int alpha_channel, int flags,
322                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
323                                          void *alloc_context);
324
325
326
327 //////////////////////////////////////////////////////////////////////////////
328 //
329 // Full-complexity API
330 //
331 // This extends the medium API as follows:
332 //
333 //       * uint32 image type
334 //     * not typesafe
335 //     * separate filter types for each axis
336 //     * separate edge modes for each axis
337 //     * can specify scale explicitly for subpixel correctness
338 //     * can specify image source tile using texture coordinates
339
340 typedef enum
341 {
342     STBIR_TYPE_UINT8 ,
343     STBIR_TYPE_UINT16,
344     STBIR_TYPE_UINT32,
345     STBIR_TYPE_FLOAT ,
346
347     STBIR_MAX_TYPES
348 } stbir_datatype;
349
350 STBIRDEF int stbir_resize(         const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
351                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
352                                    stbir_datatype datatype,
353                                    int num_channels, int alpha_channel, int flags,
354                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
355                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
356                                    stbir_colorspace space, void *alloc_context);
357
358 STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
359                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
360                                    stbir_datatype datatype,
361                                    int num_channels, int alpha_channel, int flags,
362                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
363                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
364                                    stbir_colorspace space, void *alloc_context,
365                                    float x_scale, float y_scale,
366                                    float x_offset, float y_offset);
367
368 STBIRDEF int stbir_resize_region(  const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
369                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
370                                    stbir_datatype datatype,
371                                    int num_channels, int alpha_channel, int flags,
372                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
373                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
374                                    stbir_colorspace space, void *alloc_context,
375                                    float s0, float t0, float s1, float t1);
376 // (s0, t0) & (s1, t1) are the top-left and bottom right corner (uv addressing style: [0, 1]x[0, 1]) of a region of the input image to use.
377
378 //
379 //
380 ////   end header file   /////////////////////////////////////////////////////
381 #endif // STBIR_INCLUDE_STB_IMAGE_RESIZE_H
382
383
384
385
386
387 #ifdef STB_IMAGE_RESIZE_IMPLEMENTATION
388
389 #ifndef STBIR_ASSERT
390 #include <assert.h>
391 #define STBIR_ASSERT(x) assert(x)
392 #endif
393
394 // For memset
395 #include <string.h>
396
397 #include <math.h>
398
399 #ifndef STBIR_MALLOC
400 #include <stdlib.h>
401 // use comma operator to evaluate c, to avoid "unused parameter" warnings
402 #define STBIR_MALLOC(size,c) ((void)(c), malloc(size))
403 #define STBIR_FREE(ptr,c)    ((void)(c), free(ptr))
404 #endif
405
406 #ifndef _MSC_VER
407 #ifdef __cplusplus
408 #define stbir__inline inline
409 #else
410 #define stbir__inline
411 #endif
412 #else
413 #define stbir__inline __forceinline
414 #endif
415
416
417 // should produce compiler error if size is wrong
418 typedef unsigned char stbir__validate_uint32[sizeof(stbir_uint32) == 4 ? 1 : -1];
419
420 #ifdef _MSC_VER
421 #define STBIR__NOTUSED(v)  (void)(v)
422 #else
423 #define STBIR__NOTUSED(v)  (void)sizeof(v)
424 #endif
425
426 #define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
427
428 #ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
429 #define STBIR_DEFAULT_FILTER_UPSAMPLE    STBIR_FILTER_CATMULLROM
430 #endif
431
432 #ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
433 #define STBIR_DEFAULT_FILTER_DOWNSAMPLE  STBIR_FILTER_MITCHELL
434 #endif
435
436 #ifndef STBIR_PROGRESS_REPORT
437 #define STBIR_PROGRESS_REPORT(float_0_to_1)
438 #endif
439
440 #ifndef STBIR_MAX_CHANNELS
441 #define STBIR_MAX_CHANNELS 64
442 #endif
443
444 #if STBIR_MAX_CHANNELS > 65536
445 #error "Too many channels; STBIR_MAX_CHANNELS must be no more than 65536."
446 // because we store the indices in 16-bit variables
447 #endif
448
449 // This value is added to alpha just before premultiplication to avoid
450 // zeroing out color values. It is equivalent to 2^-80. If you don't want
451 // that behavior (it may interfere if you have floating point images with
452 // very small alpha values) then you can define STBIR_NO_ALPHA_EPSILON to
453 // disable it.
454 #ifndef STBIR_ALPHA_EPSILON
455 #define STBIR_ALPHA_EPSILON ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
456 #endif
457
458
459
460 #ifdef _MSC_VER
461 #define STBIR__UNUSED_PARAM(v)  (void)(v)
462 #else
463 #define STBIR__UNUSED_PARAM(v)  (void)sizeof(v)
464 #endif
465
466 // must match stbir_datatype
467 static unsigned char stbir__type_size[] = {
468     1, // STBIR_TYPE_UINT8
469     2, // STBIR_TYPE_UINT16
470     4, // STBIR_TYPE_UINT32
471     4, // STBIR_TYPE_FLOAT
472 };
473
474 // Kernel function centered at 0
475 typedef float (stbir__kernel_fn)(float x, float scale);
476 typedef float (stbir__support_fn)(float scale);
477
478 typedef struct
479 {
480     stbir__kernel_fn* kernel;
481     stbir__support_fn* support;
482 } stbir__filter_info;
483
484 // When upsampling, the contributors are which source pixels contribute.
485 // When downsampling, the contributors are which destination pixels are contributed to.
486 typedef struct
487 {
488     int n0; // First contributing pixel
489     int n1; // Last contributing pixel
490 } stbir__contributors;
491
492 typedef struct
493 {
494     const void* input_data;
495     int input_w;
496     int input_h;
497     int input_stride_bytes;
498
499     void* output_data;
500     int output_w;
501     int output_h;
502     int output_stride_bytes;
503
504     float s0, t0, s1, t1;
505
506     float horizontal_shift; // Units: output pixels
507     float vertical_shift;   // Units: output pixels
508     float horizontal_scale;
509     float vertical_scale;
510
511     int channels;
512     int alpha_channel;
513     stbir_uint32 flags;
514     stbir_datatype type;
515     stbir_filter horizontal_filter;
516     stbir_filter vertical_filter;
517     stbir_edge edge_horizontal;
518     stbir_edge edge_vertical;
519     stbir_colorspace colorspace;
520
521     stbir__contributors* horizontal_contributors;
522     float* horizontal_coefficients;
523
524     stbir__contributors* vertical_contributors;
525     float* vertical_coefficients;
526
527     int decode_buffer_pixels;
528     float* decode_buffer;
529
530     float* horizontal_buffer;
531
532     // cache these because ceil/floor are inexplicably showing up in profile
533     int horizontal_coefficient_width;
534     int vertical_coefficient_width;
535     int horizontal_filter_pixel_width;
536     int vertical_filter_pixel_width;
537     int horizontal_filter_pixel_margin;
538     int vertical_filter_pixel_margin;
539     int horizontal_num_contributors;
540     int vertical_num_contributors;
541
542     int ring_buffer_length_bytes;   // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
543     int ring_buffer_num_entries;    // Total number of entries in the ring buffer.
544     int ring_buffer_first_scanline;
545     int ring_buffer_last_scanline;
546     int ring_buffer_begin_index;    // first_scanline is at this index in the ring buffer
547     float* ring_buffer;
548
549     float* encode_buffer; // A temporary buffer to store floats so we don't lose precision while we do multiply-adds.
550
551     int horizontal_contributors_size;
552     int horizontal_coefficients_size;
553     int vertical_contributors_size;
554     int vertical_coefficients_size;
555     int decode_buffer_size;
556     int horizontal_buffer_size;
557     int ring_buffer_size;
558     int encode_buffer_size;
559 } stbir__info;
560
561
562 static const float stbir__max_uint8_as_float  = 255.0f;
563 static const float stbir__max_uint16_as_float = 65535.0f;
564 static const double stbir__max_uint32_as_float = 4294967295.0;
565
566
567 static stbir__inline int stbir__min(int a, int b)
568 {
569     return a < b ? a : b;
570 }
571
572 static stbir__inline float stbir__saturate(float x)
573 {
574     if (x < 0)
575         return 0;
576
577     if (x > 1)
578         return 1;
579
580     return x;
581 }
582
583 #ifdef STBIR_SATURATE_INT
584 static stbir__inline stbir_uint8 stbir__saturate8(int x)
585 {
586     if ((unsigned int) x <= 255)
587         return x;
588
589     if (x < 0)
590         return 0;
591
592     return 255;
593 }
594
595 static stbir__inline stbir_uint16 stbir__saturate16(int x)
596 {
597     if ((unsigned int) x <= 65535)
598         return x;
599
600     if (x < 0)
601         return 0;
602
603     return 65535;
604 }
605 #endif
606
607 static float stbir__srgb_uchar_to_linear_float[256] = {
608     0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
609     0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
610     0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
611     0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
612     0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
613     0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
614     0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
615     0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
616     0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
617     0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
618     0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
619     0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
620     0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
621     0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
622     0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
623     0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
624     0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
625     0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
626     0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
627     0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
628     0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
629     0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
630     0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
631     0.982251f, 0.991102f, 1.0f
632 };
633
634 static float stbir__srgb_to_linear(float f)
635 {
636     if (f <= 0.04045f)
637         return f / 12.92f;
638     else
639         return (float)pow((f + 0.055f) / 1.055f, 2.4f);
640 }
641
642 static float stbir__linear_to_srgb(float f)
643 {
644     if (f <= 0.0031308f)
645         return f * 12.92f;
646     else
647         return 1.055f * (float)pow(f, 1 / 2.4f) - 0.055f;
648 }
649
650 #ifndef STBIR_NON_IEEE_FLOAT
651 // From https://gist.github.com/rygorous/2203834
652
653 typedef union
654 {
655     stbir_uint32 u;
656     float f;
657 } stbir__FP32;
658
659 static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
660     0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
661     0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
662     0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
663     0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
664     0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
665     0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
666     0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
667     0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
668     0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
669     0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
670     0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
671     0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
672     0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
673 };
674
675 static stbir_uint8 stbir__linear_to_srgb_uchar(float in)
676 {
677     static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
678     static const stbir__FP32 minval = { (127-13) << 23 };
679     stbir_uint32 tab,bias,scale,t;
680     stbir__FP32 f;
681
682     // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
683     // The tests are carefully written so that NaNs map to 0, same as in the reference
684     // implementation.
685     if (!(in > minval.f)) // written this way to catch NaNs
686         in = minval.f;
687     if (in > almostone.f)
688         in = almostone.f;
689
690     // Do the table lookup and unpack bias, scale
691     f.f = in;
692     tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
693     bias = (tab >> 16) << 9;
694     scale = tab & 0xffff;
695
696     // Grab next-highest mantissa bits and perform linear interpolation
697     t = (f.u >> 12) & 0xff;
698     return (unsigned char) ((bias + scale*t) >> 16);
699 }
700
701 #else
702 // sRGB transition values, scaled by 1<<28
703 static int stbir__srgb_offset_to_linear_scaled[256] =
704 {
705             0,     40738,    122216,    203693,    285170,    366648,    448125,    529603,
706        611080,    692557,    774035,    855852,    942009,   1033024,   1128971,   1229926,
707       1335959,   1447142,   1563542,   1685229,   1812268,   1944725,   2082664,   2226148,
708       2375238,   2529996,   2690481,   2856753,   3028870,   3206888,   3390865,   3580856,
709       3776916,   3979100,   4187460,   4402049,   4622919,   4850123,   5083710,   5323731,
710       5570236,   5823273,   6082892,   6349140,   6622065,   6901714,   7188133,   7481369,
711       7781466,   8088471,   8402427,   8723380,   9051372,   9386448,   9728650,  10078021,
712      10434603,  10798439,  11169569,  11548036,  11933879,  12327139,  12727857,  13136073,
713      13551826,  13975156,  14406100,  14844697,  15290987,  15745007,  16206795,  16676389,
714      17153826,  17639142,  18132374,  18633560,  19142734,  19659934,  20185196,  20718552,
715      21260042,  21809696,  22367554,  22933648,  23508010,  24090680,  24681686,  25281066,
716      25888850,  26505076,  27129772,  27762974,  28404716,  29055026,  29713942,  30381490,
717      31057708,  31742624,  32436272,  33138682,  33849884,  34569912,  35298800,  36036568,
718      36783260,  37538896,  38303512,  39077136,  39859796,  40651528,  41452360,  42262316,
719      43081432,  43909732,  44747252,  45594016,  46450052,  47315392,  48190064,  49074096,
720      49967516,  50870356,  51782636,  52704392,  53635648,  54576432,  55526772,  56486700,
721      57456236,  58435408,  59424248,  60422780,  61431036,  62449032,  63476804,  64514376,
722      65561776,  66619028,  67686160,  68763192,  69850160,  70947088,  72053992,  73170912,
723      74297864,  75434880,  76581976,  77739184,  78906536,  80084040,  81271736,  82469648,
724      83677792,  84896192,  86124888,  87363888,  88613232,  89872928,  91143016,  92423512,
725      93714432,  95015816,  96327688,  97650056,  98982952, 100326408, 101680440, 103045072,
726     104420320, 105806224, 107202800, 108610064, 110028048, 111456776, 112896264, 114346544,
727     115807632, 117279552, 118762328, 120255976, 121760536, 123276016, 124802440, 126339832,
728     127888216, 129447616, 131018048, 132599544, 134192112, 135795792, 137410592, 139036528,
729     140673648, 142321952, 143981456, 145652208, 147334208, 149027488, 150732064, 152447968,
730     154175200, 155913792, 157663776, 159425168, 161197984, 162982240, 164777968, 166585184,
731     168403904, 170234160, 172075968, 173929344, 175794320, 177670896, 179559120, 181458992,
732     183370528, 185293776, 187228736, 189175424, 191133888, 193104112, 195086128, 197079968,
733     199085648, 201103184, 203132592, 205173888, 207227120, 209292272, 211369392, 213458480,
734     215559568, 217672656, 219797792, 221934976, 224084240, 226245600, 228419056, 230604656,
735     232802400, 235012320, 237234432, 239468736, 241715280, 243974080, 246245120, 248528464,
736     250824112, 253132064, 255452368, 257785040, 260130080, 262487520, 264857376, 267239664,
737 };
738
739 static stbir_uint8 stbir__linear_to_srgb_uchar(float f)
740 {
741     int x = (int) (f * (1 << 28)); // has headroom so you don't need to clamp
742     int v = 0;
743     int i;
744
745     // Refine the guess with a short binary search.
746     i = v + 128; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
747     i = v +  64; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
748     i = v +  32; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
749     i = v +  16; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
750     i = v +   8; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
751     i = v +   4; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
752     i = v +   2; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
753     i = v +   1; if (x >= stbir__srgb_offset_to_linear_scaled[i]) v = i;
754
755     return (stbir_uint8) v;
756 }
757 #endif
758
759 static float stbir__filter_trapezoid(float x, float scale)
760 {
761     float halfscale = scale / 2;
762     float t = 0.5f + halfscale;
763     STBIR_ASSERT(scale <= 1);
764
765     x = (float)fabs(x);
766
767     if (x >= t)
768         return 0;
769     else
770     {
771         float r = 0.5f - halfscale;
772         if (x <= r)
773             return 1;
774         else
775             return (t - x) / scale;
776     }
777 }
778
779 static float stbir__support_trapezoid(float scale)
780 {
781     STBIR_ASSERT(scale <= 1);
782     return 0.5f + scale / 2;
783 }
784
785 static float stbir__filter_triangle(float x, float s)
786 {
787     STBIR__UNUSED_PARAM(s);
788
789     x = (float)fabs(x);
790
791     if (x <= 1.0f)
792         return 1 - x;
793     else
794         return 0;
795 }
796
797 static float stbir__filter_cubic(float x, float s)
798 {
799     STBIR__UNUSED_PARAM(s);
800
801     x = (float)fabs(x);
802
803     if (x < 1.0f)
804         return (4 + x*x*(3*x - 6))/6;
805     else if (x < 2.0f)
806         return (8 + x*(-12 + x*(6 - x)))/6;
807
808     return (0.0f);
809 }
810
811 static float stbir__filter_catmullrom(float x, float s)
812 {
813     STBIR__UNUSED_PARAM(s);
814
815     x = (float)fabs(x);
816
817     if (x < 1.0f)
818         return 1 - x*x*(2.5f - 1.5f*x);
819     else if (x < 2.0f)
820         return 2 - x*(4 + x*(0.5f*x - 2.5f));
821
822     return (0.0f);
823 }
824
825 static float stbir__filter_mitchell(float x, float s)
826 {
827     STBIR__UNUSED_PARAM(s);
828
829     x = (float)fabs(x);
830
831     if (x < 1.0f)
832         return (16 + x*x*(21 * x - 36))/18;
833     else if (x < 2.0f)
834         return (32 + x*(-60 + x*(36 - 7*x)))/18;
835
836     return (0.0f);
837 }
838
839 static float stbir__support_zero(float s)
840 {
841     STBIR__UNUSED_PARAM(s);
842     return 0;
843 }
844
845 static float stbir__support_one(float s)
846 {
847     STBIR__UNUSED_PARAM(s);
848     return 1;
849 }
850
851 static float stbir__support_two(float s)
852 {
853     STBIR__UNUSED_PARAM(s);
854     return 2;
855 }
856
857 static stbir__filter_info stbir__filter_info_table[] = {
858         { NULL,                     stbir__support_zero },
859         { stbir__filter_trapezoid,  stbir__support_trapezoid },
860         { stbir__filter_triangle,   stbir__support_one },
861         { stbir__filter_cubic,      stbir__support_two },
862         { stbir__filter_catmullrom, stbir__support_two },
863         { stbir__filter_mitchell,   stbir__support_two },
864 };
865
866 stbir__inline static int stbir__use_upsampling(float ratio)
867 {
868     return ratio > 1;
869 }
870
871 stbir__inline static int stbir__use_width_upsampling(stbir__info* stbir_info)
872 {
873     return stbir__use_upsampling(stbir_info->horizontal_scale);
874 }
875
876 stbir__inline static int stbir__use_height_upsampling(stbir__info* stbir_info)
877 {
878     return stbir__use_upsampling(stbir_info->vertical_scale);
879 }
880
881 // This is the maximum number of input samples that can affect an output sample
882 // with the given filter
883 static int stbir__get_filter_pixel_width(stbir_filter filter, float scale)
884 {
885     STBIR_ASSERT(filter != 0);
886     STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
887
888     if (stbir__use_upsampling(scale))
889         return (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2);
890     else
891         return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2 / scale);
892 }
893
894 // This is how much to expand buffers to account for filters seeking outside
895 // the image boundaries.
896 static int stbir__get_filter_pixel_margin(stbir_filter filter, float scale)
897 {
898     return stbir__get_filter_pixel_width(filter, scale) / 2;
899 }
900
901 static int stbir__get_coefficient_width(stbir_filter filter, float scale)
902 {
903     if (stbir__use_upsampling(scale))
904         return (int)ceil(stbir__filter_info_table[filter].support(1 / scale) * 2);
905     else
906         return (int)ceil(stbir__filter_info_table[filter].support(scale) * 2);
907 }
908
909 static int stbir__get_contributors(float scale, stbir_filter filter, int input_size, int output_size)
910 {
911     if (stbir__use_upsampling(scale))
912         return output_size;
913     else
914         return (input_size + stbir__get_filter_pixel_margin(filter, scale) * 2);
915 }
916
917 static int stbir__get_total_horizontal_coefficients(stbir__info* info)
918 {
919     return info->horizontal_num_contributors
920          * stbir__get_coefficient_width      (info->horizontal_filter, info->horizontal_scale);
921 }
922
923 static int stbir__get_total_vertical_coefficients(stbir__info* info)
924 {
925     return info->vertical_num_contributors
926          * stbir__get_coefficient_width      (info->vertical_filter, info->vertical_scale);
927 }
928
929 static stbir__contributors* stbir__get_contributor(stbir__contributors* contributors, int n)
930 {
931     return &contributors[n];
932 }
933
934 // For perf reasons this code is duplicated in stbir__resample_horizontal_upsample/downsample,
935 // if you change it here change it there too.
936 static float* stbir__get_coefficient(float* coefficients, stbir_filter filter, float scale, int n, int c)
937 {
938     int width = stbir__get_coefficient_width(filter, scale);
939     return &coefficients[width*n + c];
940 }
941
942 static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
943 {
944     switch (edge)
945     {
946     case STBIR_EDGE_ZERO:
947         return 0; // we'll decode the wrong pixel here, and then overwrite with 0s later
948
949     case STBIR_EDGE_CLAMP:
950         if (n < 0)
951             return 0;
952
953         if (n >= max)
954             return max - 1;
955
956         return n; // NOTREACHED
957
958     case STBIR_EDGE_REFLECT:
959     {
960         if (n < 0)
961         {
962             if (n < max)
963                 return -n;
964             else
965                 return max - 1;
966         }
967
968         if (n >= max)
969         {
970             int max2 = max * 2;
971             if (n >= max2)
972                 return 0;
973             else
974                 return max2 - n - 1;
975         }
976
977         return n; // NOTREACHED
978     }
979
980     case STBIR_EDGE_WRAP:
981         if (n >= 0)
982             return (n % max);
983         else
984         {
985             int m = (-n) % max;
986
987             if (m != 0)
988                 m = max - m;
989
990             return (m);
991         }
992         // NOTREACHED
993
994     default:
995         STBIR_ASSERT(!"Unimplemented edge type");
996         return 0;
997     }
998 }
999
1000 stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
1001 {
1002     // avoid per-pixel switch
1003     if (n >= 0 && n < max)
1004         return n;
1005     return stbir__edge_wrap_slow(edge, n, max);
1006 }
1007
1008 // What input pixels contribute to this output pixel?
1009 static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
1010 {
1011     float out_pixel_center = (float)n + 0.5f;
1012     float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
1013     float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
1014
1015     float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) / scale_ratio;
1016     float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) / scale_ratio;
1017
1018     *in_center_of_out = (out_pixel_center + out_shift) / scale_ratio;
1019     *in_first_pixel = (int)(floor(in_pixel_influence_lowerbound + 0.5));
1020     *in_last_pixel = (int)(floor(in_pixel_influence_upperbound - 0.5));
1021 }
1022
1023 // What output pixels does this input pixel contribute to?
1024 static void stbir__calculate_sample_range_downsample(int n, float in_pixels_radius, float scale_ratio, float out_shift, int* out_first_pixel, int* out_last_pixel, float* out_center_of_in)
1025 {
1026     float in_pixel_center = (float)n + 0.5f;
1027     float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
1028     float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
1029
1030     float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale_ratio - out_shift;
1031     float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale_ratio - out_shift;
1032
1033     *out_center_of_in = in_pixel_center * scale_ratio - out_shift;
1034     *out_first_pixel = (int)(floor(out_pixel_influence_lowerbound + 0.5));
1035     *out_last_pixel = (int)(floor(out_pixel_influence_upperbound - 0.5));
1036 }
1037
1038 static void stbir__calculate_coefficients_upsample(stbir_filter filter, float scale, int in_first_pixel, int in_last_pixel, float in_center_of_out, stbir__contributors* contributor, float* coefficient_group)
1039 {
1040     int i;
1041     float total_filter = 0;
1042     float filter_scale;
1043
1044     STBIR_ASSERT(in_last_pixel - in_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(1/scale) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
1045
1046     contributor->n0 = in_first_pixel;
1047     contributor->n1 = in_last_pixel;
1048
1049     STBIR_ASSERT(contributor->n1 >= contributor->n0);
1050
1051     for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
1052     {
1053         float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
1054         coefficient_group[i] = stbir__filter_info_table[filter].kernel(in_center_of_out - in_pixel_center, 1 / scale);
1055
1056         // If the coefficient is zero, skip it. (Don't do the <0 check here, we want the influence of those outside pixels.)
1057         if (i == 0 && !coefficient_group[i])
1058         {
1059             contributor->n0 = ++in_first_pixel;
1060             i--;
1061             continue;
1062         }
1063
1064         total_filter += coefficient_group[i];
1065     }
1066
1067     STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
1068
1069     STBIR_ASSERT(total_filter > 0.9);
1070     STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
1071
1072     // Make sure the sum of all coefficients is 1.
1073     filter_scale = 1 / total_filter;
1074
1075     for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
1076         coefficient_group[i] *= filter_scale;
1077
1078     for (i = in_last_pixel - in_first_pixel; i >= 0; i--)
1079     {
1080         if (coefficient_group[i])
1081             break;
1082
1083         // This line has no weight. We can skip it.
1084         contributor->n1 = contributor->n0 + i - 1;
1085     }
1086 }
1087
1088 static void stbir__calculate_coefficients_downsample(stbir_filter filter, float scale_ratio, int out_first_pixel, int out_last_pixel, float out_center_of_in, stbir__contributors* contributor, float* coefficient_group)
1089 {
1090     int i;
1091
1092      STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
1093
1094     contributor->n0 = out_first_pixel;
1095     contributor->n1 = out_last_pixel;
1096
1097     STBIR_ASSERT(contributor->n1 >= contributor->n0);
1098
1099     for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
1100     {
1101         float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
1102         float x = out_pixel_center - out_center_of_in;
1103         coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
1104     }
1105
1106     STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
1107
1108     for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
1109     {
1110         if (coefficient_group[i])
1111             break;
1112
1113         // This line has no weight. We can skip it.
1114         contributor->n1 = contributor->n0 + i - 1;
1115     }
1116 }
1117
1118 static void stbir__normalize_downsample_coefficients(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, int input_size, int output_size)
1119 {
1120     int num_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
1121     int num_coefficients = stbir__get_coefficient_width(filter, scale_ratio);
1122     int i, j;
1123     int skip;
1124
1125     for (i = 0; i < output_size; i++)
1126     {
1127         float scale;
1128         float total = 0;
1129
1130         for (j = 0; j < num_contributors; j++)
1131         {
1132             if (i >= contributors[j].n0 && i <= contributors[j].n1)
1133             {
1134                 float coefficient = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0);
1135                 total += coefficient;
1136             }
1137             else if (i < contributors[j].n0)
1138                 break;
1139         }
1140
1141         STBIR_ASSERT(total > 0.9f);
1142         STBIR_ASSERT(total < 1.1f);
1143
1144         scale = 1 / total;
1145
1146         for (j = 0; j < num_contributors; j++)
1147         {
1148             if (i >= contributors[j].n0 && i <= contributors[j].n1)
1149                 *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i - contributors[j].n0) *= scale;
1150             else if (i < contributors[j].n0)
1151                 break;
1152         }
1153     }
1154
1155     // Optimize: Skip zero coefficients and contributions outside of image bounds.
1156     // Do this after normalizing because normalization depends on the n0/n1 values.
1157     for (j = 0; j < num_contributors; j++)
1158     {
1159         int range, max, width;
1160
1161         skip = 0;
1162         while (*stbir__get_coefficient(coefficients, filter, scale_ratio, j, skip) == 0)
1163             skip++;
1164
1165         contributors[j].n0 += skip;
1166
1167         while (contributors[j].n0 < 0)
1168         {
1169             contributors[j].n0++;
1170             skip++;
1171         }
1172
1173         range = contributors[j].n1 - contributors[j].n0 + 1;
1174         max = stbir__min(num_coefficients, range);
1175
1176         width = stbir__get_coefficient_width(filter, scale_ratio);
1177         for (i = 0; i < max; i++)
1178         {
1179             if (i + skip >= width)
1180                 break;
1181
1182             *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i) = *stbir__get_coefficient(coefficients, filter, scale_ratio, j, i + skip);
1183         }
1184
1185         continue;
1186     }
1187
1188     // Using min to avoid writing into invalid pixels.
1189     for (i = 0; i < num_contributors; i++)
1190         contributors[i].n1 = stbir__min(contributors[i].n1, output_size - 1);
1191 }
1192
1193 // Each scan line uses the same kernel values so we should calculate the kernel
1194 // values once and then we can use them for every scan line.
1195 static void stbir__calculate_filters(stbir__contributors* contributors, float* coefficients, stbir_filter filter, float scale_ratio, float shift, int input_size, int output_size)
1196 {
1197     int n;
1198     int total_contributors = stbir__get_contributors(scale_ratio, filter, input_size, output_size);
1199
1200     if (stbir__use_upsampling(scale_ratio))
1201     {
1202         float out_pixels_radius = stbir__filter_info_table[filter].support(1 / scale_ratio) * scale_ratio;
1203
1204         // Looping through out pixels
1205         for (n = 0; n < total_contributors; n++)
1206         {
1207             float in_center_of_out; // Center of the current out pixel in the in pixel space
1208             int in_first_pixel, in_last_pixel;
1209
1210             stbir__calculate_sample_range_upsample(n, out_pixels_radius, scale_ratio, shift, &in_first_pixel, &in_last_pixel, &in_center_of_out);
1211
1212             stbir__calculate_coefficients_upsample(filter, scale_ratio, in_first_pixel, in_last_pixel, in_center_of_out, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
1213         }
1214     }
1215     else
1216     {
1217         float in_pixels_radius = stbir__filter_info_table[filter].support(scale_ratio) / scale_ratio;
1218
1219         // Looping through in pixels
1220         for (n = 0; n < total_contributors; n++)
1221         {
1222             float out_center_of_in; // Center of the current out pixel in the in pixel space
1223             int out_first_pixel, out_last_pixel;
1224             int n_adjusted = n - stbir__get_filter_pixel_margin(filter, scale_ratio);
1225
1226             stbir__calculate_sample_range_downsample(n_adjusted, in_pixels_radius, scale_ratio, shift, &out_first_pixel, &out_last_pixel, &out_center_of_in);
1227
1228             stbir__calculate_coefficients_downsample(filter, scale_ratio, out_first_pixel, out_last_pixel, out_center_of_in, stbir__get_contributor(contributors, n), stbir__get_coefficient(coefficients, filter, scale_ratio, n, 0));
1229         }
1230
1231         stbir__normalize_downsample_coefficients(contributors, coefficients, filter, scale_ratio, input_size, output_size);
1232     }
1233 }
1234
1235 static float* stbir__get_decode_buffer(stbir__info* stbir_info)
1236 {
1237     // The 0 index of the decode buffer starts after the margin. This makes
1238     // it okay to use negative indexes on the decode buffer.
1239     return &stbir_info->decode_buffer[stbir_info->horizontal_filter_pixel_margin * stbir_info->channels];
1240 }
1241
1242 #define STBIR__DECODE(type, colorspace) ((int)(type) * (STBIR_MAX_COLORSPACES) + (int)(colorspace))
1243
1244 static void stbir__decode_scanline(stbir__info* stbir_info, int n)
1245 {
1246     int c;
1247     int channels = stbir_info->channels;
1248     int alpha_channel = stbir_info->alpha_channel;
1249     int type = stbir_info->type;
1250     int colorspace = stbir_info->colorspace;
1251     int input_w = stbir_info->input_w;
1252     size_t input_stride_bytes = stbir_info->input_stride_bytes;
1253     float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1254     stbir_edge edge_horizontal = stbir_info->edge_horizontal;
1255     stbir_edge edge_vertical = stbir_info->edge_vertical;
1256     size_t in_buffer_row_offset = stbir__edge_wrap(edge_vertical, n, stbir_info->input_h) * input_stride_bytes;
1257     const void* input_data = (char *) stbir_info->input_data + in_buffer_row_offset;
1258     int max_x = input_w + stbir_info->horizontal_filter_pixel_margin;
1259     int decode = STBIR__DECODE(type, colorspace);
1260
1261     int x = -stbir_info->horizontal_filter_pixel_margin;
1262
1263     // special handling for STBIR_EDGE_ZERO because it needs to return an item that doesn't appear in the input,
1264     // and we want to avoid paying overhead on every pixel if not STBIR_EDGE_ZERO
1265     if (edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->input_h))
1266     {
1267         for (; x < max_x; x++)
1268             for (c = 0; c < channels; c++)
1269                 decode_buffer[x*channels + c] = 0;
1270         return;
1271     }
1272
1273     switch (decode)
1274     {
1275     case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
1276         for (; x < max_x; x++)
1277         {
1278             int decode_pixel_index = x * channels;
1279             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1280             for (c = 0; c < channels; c++)
1281                 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / stbir__max_uint8_as_float;
1282         }
1283         break;
1284
1285     case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
1286         for (; x < max_x; x++)
1287         {
1288             int decode_pixel_index = x * channels;
1289             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1290             for (c = 0; c < channels; c++)
1291                 decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
1292
1293             if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1294                 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint8_as_float;
1295         }
1296         break;
1297
1298     case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
1299         for (; x < max_x; x++)
1300         {
1301             int decode_pixel_index = x * channels;
1302             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1303             for (c = 0; c < channels; c++)
1304                 decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float;
1305         }
1306         break;
1307
1308     case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
1309         for (; x < max_x; x++)
1310         {
1311             int decode_pixel_index = x * channels;
1312             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1313             for (c = 0; c < channels; c++)
1314                 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / stbir__max_uint16_as_float);
1315
1316             if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1317                 decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint16_as_float;
1318         }
1319         break;
1320
1321     case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
1322         for (; x < max_x; x++)
1323         {
1324             int decode_pixel_index = x * channels;
1325             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1326             for (c = 0; c < channels; c++)
1327                 decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float);
1328         }
1329         break;
1330
1331     case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
1332         for (; x < max_x; x++)
1333         {
1334             int decode_pixel_index = x * channels;
1335             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1336             for (c = 0; c < channels; c++)
1337                 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / stbir__max_uint32_as_float));
1338
1339             if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1340                 decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / stbir__max_uint32_as_float);
1341         }
1342         break;
1343
1344     case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1345         for (; x < max_x; x++)
1346         {
1347             int decode_pixel_index = x * channels;
1348             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1349             for (c = 0; c < channels; c++)
1350                 decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
1351         }
1352         break;
1353
1354     case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1355         for (; x < max_x; x++)
1356         {
1357             int decode_pixel_index = x * channels;
1358             int input_pixel_index = stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
1359             for (c = 0; c < channels; c++)
1360                 decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
1361
1362             if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1363                 decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
1364         }
1365
1366         break;
1367
1368     default:
1369         STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1370         break;
1371     }
1372
1373     if (!(stbir_info->flags & STBIR_FLAG_ALPHA_PREMULTIPLIED))
1374     {
1375         for (x = -stbir_info->horizontal_filter_pixel_margin; x < max_x; x++)
1376         {
1377             int decode_pixel_index = x * channels;
1378
1379             // If the alpha value is 0 it will clobber the color values. Make sure it's not.
1380             float alpha = decode_buffer[decode_pixel_index + alpha_channel];
1381 #ifndef STBIR_NO_ALPHA_EPSILON
1382             if (stbir_info->type != STBIR_TYPE_FLOAT) {
1383                 alpha += STBIR_ALPHA_EPSILON;
1384                 decode_buffer[decode_pixel_index + alpha_channel] = alpha;
1385             }
1386 #endif
1387             for (c = 0; c < channels; c++)
1388             {
1389                 if (c == alpha_channel)
1390                     continue;
1391
1392                 decode_buffer[decode_pixel_index + c] *= alpha;
1393             }
1394         }
1395     }
1396
1397     if (edge_horizontal == STBIR_EDGE_ZERO)
1398     {
1399         for (x = -stbir_info->horizontal_filter_pixel_margin; x < 0; x++)
1400         {
1401             for (c = 0; c < channels; c++)
1402                 decode_buffer[x*channels + c] = 0;
1403         }
1404         for (x = input_w; x < max_x; x++)
1405         {
1406             for (c = 0; c < channels; c++)
1407                 decode_buffer[x*channels + c] = 0;
1408         }
1409     }
1410 }
1411
1412 static float* stbir__get_ring_buffer_entry(float* ring_buffer, int index, int ring_buffer_length)
1413 {
1414     return &ring_buffer[index * ring_buffer_length];
1415 }
1416
1417 static float* stbir__add_empty_ring_buffer_entry(stbir__info* stbir_info, int n)
1418 {
1419     int ring_buffer_index;
1420     float* ring_buffer;
1421
1422     stbir_info->ring_buffer_last_scanline = n;
1423
1424     if (stbir_info->ring_buffer_begin_index < 0)
1425     {
1426         ring_buffer_index = stbir_info->ring_buffer_begin_index = 0;
1427         stbir_info->ring_buffer_first_scanline = n;
1428     }
1429     else
1430     {
1431         ring_buffer_index = (stbir_info->ring_buffer_begin_index + (stbir_info->ring_buffer_last_scanline - stbir_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
1432         STBIR_ASSERT(ring_buffer_index != stbir_info->ring_buffer_begin_index);
1433     }
1434
1435     ring_buffer = stbir__get_ring_buffer_entry(stbir_info->ring_buffer, ring_buffer_index, stbir_info->ring_buffer_length_bytes / sizeof(float));
1436     memset(ring_buffer, 0, stbir_info->ring_buffer_length_bytes);
1437
1438     return ring_buffer;
1439 }
1440
1441
1442 static void stbir__resample_horizontal_upsample(stbir__info* stbir_info, float* output_buffer)
1443 {
1444     int x, k;
1445     int output_w = stbir_info->output_w;
1446     int channels = stbir_info->channels;
1447     float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1448     stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1449     float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1450     int coefficient_width = stbir_info->horizontal_coefficient_width;
1451
1452     for (x = 0; x < output_w; x++)
1453     {
1454         int n0 = horizontal_contributors[x].n0;
1455         int n1 = horizontal_contributors[x].n1;
1456
1457         int out_pixel_index = x * channels;
1458         int coefficient_group = coefficient_width * x;
1459         int coefficient_counter = 0;
1460
1461         STBIR_ASSERT(n1 >= n0);
1462         STBIR_ASSERT(n0 >= -stbir_info->horizontal_filter_pixel_margin);
1463         STBIR_ASSERT(n1 >= -stbir_info->horizontal_filter_pixel_margin);
1464         STBIR_ASSERT(n0 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1465         STBIR_ASSERT(n1 < stbir_info->input_w + stbir_info->horizontal_filter_pixel_margin);
1466
1467         switch (channels) {
1468             case 1:
1469                 for (k = n0; k <= n1; k++)
1470                 {
1471                     int in_pixel_index = k * 1;
1472                     float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1473                     STBIR_ASSERT(coefficient != 0);
1474                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1475                 }
1476                 break;
1477             case 2:
1478                 for (k = n0; k <= n1; k++)
1479                 {
1480                     int in_pixel_index = k * 2;
1481                     float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1482                     STBIR_ASSERT(coefficient != 0);
1483                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1484                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1485                 }
1486                 break;
1487             case 3:
1488                 for (k = n0; k <= n1; k++)
1489                 {
1490                     int in_pixel_index = k * 3;
1491                     float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1492                     STBIR_ASSERT(coefficient != 0);
1493                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1494                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1495                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1496                 }
1497                 break;
1498             case 4:
1499                 for (k = n0; k <= n1; k++)
1500                 {
1501                     int in_pixel_index = k * 4;
1502                     float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1503                     STBIR_ASSERT(coefficient != 0);
1504                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1505                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1506                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1507                     output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1508                 }
1509                 break;
1510             default:
1511                 for (k = n0; k <= n1; k++)
1512                 {
1513                     int in_pixel_index = k * channels;
1514                     float coefficient = horizontal_coefficients[coefficient_group + coefficient_counter++];
1515                     int c;
1516                     STBIR_ASSERT(coefficient != 0);
1517                     for (c = 0; c < channels; c++)
1518                         output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1519                 }
1520                 break;
1521         }
1522     }
1523 }
1524
1525 static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float* output_buffer)
1526 {
1527     int x, k;
1528     int input_w = stbir_info->input_w;
1529     int channels = stbir_info->channels;
1530     float* decode_buffer = stbir__get_decode_buffer(stbir_info);
1531     stbir__contributors* horizontal_contributors = stbir_info->horizontal_contributors;
1532     float* horizontal_coefficients = stbir_info->horizontal_coefficients;
1533     int coefficient_width = stbir_info->horizontal_coefficient_width;
1534     int filter_pixel_margin = stbir_info->horizontal_filter_pixel_margin;
1535     int max_x = input_w + filter_pixel_margin * 2;
1536
1537     STBIR_ASSERT(!stbir__use_width_upsampling(stbir_info));
1538
1539     switch (channels) {
1540         case 1:
1541             for (x = 0; x < max_x; x++)
1542             {
1543                 int n0 = horizontal_contributors[x].n0;
1544                 int n1 = horizontal_contributors[x].n1;
1545
1546                 int in_x = x - filter_pixel_margin;
1547                 int in_pixel_index = in_x * 1;
1548                 int max_n = n1;
1549                 int coefficient_group = coefficient_width * x;
1550
1551                 for (k = n0; k <= max_n; k++)
1552                 {
1553                     int out_pixel_index = k * 1;
1554                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1555                     STBIR_ASSERT(coefficient != 0);
1556                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1557                 }
1558             }
1559             break;
1560
1561         case 2:
1562             for (x = 0; x < max_x; x++)
1563             {
1564                 int n0 = horizontal_contributors[x].n0;
1565                 int n1 = horizontal_contributors[x].n1;
1566
1567                 int in_x = x - filter_pixel_margin;
1568                 int in_pixel_index = in_x * 2;
1569                 int max_n = n1;
1570                 int coefficient_group = coefficient_width * x;
1571
1572                 for (k = n0; k <= max_n; k++)
1573                 {
1574                     int out_pixel_index = k * 2;
1575                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1576                     STBIR_ASSERT(coefficient != 0);
1577                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1578                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1579                 }
1580             }
1581             break;
1582
1583         case 3:
1584             for (x = 0; x < max_x; x++)
1585             {
1586                 int n0 = horizontal_contributors[x].n0;
1587                 int n1 = horizontal_contributors[x].n1;
1588
1589                 int in_x = x - filter_pixel_margin;
1590                 int in_pixel_index = in_x * 3;
1591                 int max_n = n1;
1592                 int coefficient_group = coefficient_width * x;
1593
1594                 for (k = n0; k <= max_n; k++)
1595                 {
1596                     int out_pixel_index = k * 3;
1597                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1598                     STBIR_ASSERT(coefficient != 0);
1599                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1600                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1601                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1602                 }
1603             }
1604             break;
1605
1606         case 4:
1607             for (x = 0; x < max_x; x++)
1608             {
1609                 int n0 = horizontal_contributors[x].n0;
1610                 int n1 = horizontal_contributors[x].n1;
1611
1612                 int in_x = x - filter_pixel_margin;
1613                 int in_pixel_index = in_x * 4;
1614                 int max_n = n1;
1615                 int coefficient_group = coefficient_width * x;
1616
1617                 for (k = n0; k <= max_n; k++)
1618                 {
1619                     int out_pixel_index = k * 4;
1620                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1621                     STBIR_ASSERT(coefficient != 0);
1622                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
1623                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
1624                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
1625                     output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
1626                 }
1627             }
1628             break;
1629
1630         default:
1631             for (x = 0; x < max_x; x++)
1632             {
1633                 int n0 = horizontal_contributors[x].n0;
1634                 int n1 = horizontal_contributors[x].n1;
1635
1636                 int in_x = x - filter_pixel_margin;
1637                 int in_pixel_index = in_x * channels;
1638                 int max_n = n1;
1639                 int coefficient_group = coefficient_width * x;
1640
1641                 for (k = n0; k <= max_n; k++)
1642                 {
1643                     int c;
1644                     int out_pixel_index = k * channels;
1645                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
1646                     STBIR_ASSERT(coefficient != 0);
1647                     for (c = 0; c < channels; c++)
1648                         output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
1649                 }
1650             }
1651             break;
1652     }
1653 }
1654
1655 static void stbir__decode_and_resample_upsample(stbir__info* stbir_info, int n)
1656 {
1657     // Decode the nth scanline from the source image into the decode buffer.
1658     stbir__decode_scanline(stbir_info, n);
1659
1660     // Now resample it into the ring buffer.
1661     if (stbir__use_width_upsampling(stbir_info))
1662         stbir__resample_horizontal_upsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1663     else
1664         stbir__resample_horizontal_downsample(stbir_info, stbir__add_empty_ring_buffer_entry(stbir_info, n));
1665
1666     // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
1667 }
1668
1669 static void stbir__decode_and_resample_downsample(stbir__info* stbir_info, int n)
1670 {
1671     // Decode the nth scanline from the source image into the decode buffer.
1672     stbir__decode_scanline(stbir_info, n);
1673
1674     memset(stbir_info->horizontal_buffer, 0, stbir_info->output_w * stbir_info->channels * sizeof(float));
1675
1676     // Now resample it into the horizontal buffer.
1677     if (stbir__use_width_upsampling(stbir_info))
1678         stbir__resample_horizontal_upsample(stbir_info, stbir_info->horizontal_buffer);
1679     else
1680         stbir__resample_horizontal_downsample(stbir_info, stbir_info->horizontal_buffer);
1681
1682     // Now it's sitting in the horizontal buffer ready to be distributed into the ring buffers.
1683 }
1684
1685 // Get the specified scan line from the ring buffer.
1686 static float* stbir__get_ring_buffer_scanline(int get_scanline, float* ring_buffer, int begin_index, int first_scanline, int ring_buffer_num_entries, int ring_buffer_length)
1687 {
1688     int ring_buffer_index = (begin_index + (get_scanline - first_scanline)) % ring_buffer_num_entries;
1689     return stbir__get_ring_buffer_entry(ring_buffer, ring_buffer_index, ring_buffer_length);
1690 }
1691
1692
1693 static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, float *encode_buffer, int channels, int alpha_channel, int decode)
1694 {
1695     int x;
1696     int n;
1697     int num_nonalpha;
1698     stbir_uint16 nonalpha[STBIR_MAX_CHANNELS];
1699
1700     if (!(stbir_info->flags&STBIR_FLAG_ALPHA_PREMULTIPLIED))
1701     {
1702         for (x=0; x < num_pixels; ++x)
1703         {
1704             int pixel_index = x*channels;
1705
1706             float alpha = encode_buffer[pixel_index + alpha_channel];
1707             float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
1708
1709             // unrolling this produced a 1% slowdown upscaling a large RGBA linear-space image on my machine - stb
1710             for (n = 0; n < channels; n++)
1711                 if (n != alpha_channel)
1712                     encode_buffer[pixel_index + n] *= reciprocal_alpha;
1713
1714             // We added in a small epsilon to prevent the color channel from being deleted with zero alpha.
1715             // Because we only add it for integer types, it will automatically be discarded on integer
1716             // conversion, so we don't need to subtract it back out (which would be problematic for
1717             // numeric precision reasons).
1718         }
1719     }
1720
1721     // build a table of all channels that need colorspace correction, so
1722     // we don't perform colorspace correction on channels that don't need it.
1723     for (x = 0, num_nonalpha = 0; x < channels; ++x)
1724     {
1725         if (x != alpha_channel || (stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1726         {
1727             nonalpha[num_nonalpha++] = (stbir_uint16)x;
1728         }
1729     }
1730
1731     #define STBIR__ROUND_INT(f)    ((int)          ((f)+0.5))
1732     #define STBIR__ROUND_UINT(f)   ((stbir_uint32) ((f)+0.5))
1733
1734     #ifdef STBIR__SATURATE_INT
1735     #define STBIR__ENCODE_LINEAR8(f)   stbir__saturate8 (STBIR__ROUND_INT((f) * stbir__max_uint8_as_float ))
1736     #define STBIR__ENCODE_LINEAR16(f)  stbir__saturate16(STBIR__ROUND_INT((f) * stbir__max_uint16_as_float))
1737     #else
1738     #define STBIR__ENCODE_LINEAR8(f)   (unsigned char ) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint8_as_float )
1739     #define STBIR__ENCODE_LINEAR16(f)  (unsigned short) STBIR__ROUND_INT(stbir__saturate(f) * stbir__max_uint16_as_float)
1740     #endif
1741
1742     switch (decode)
1743     {
1744         case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
1745             for (x=0; x < num_pixels; ++x)
1746             {
1747                 int pixel_index = x*channels;
1748
1749                 for (n = 0; n < channels; n++)
1750                 {
1751                     int index = pixel_index + n;
1752                     ((unsigned char*)output_buffer)[index] = STBIR__ENCODE_LINEAR8(encode_buffer[index]);
1753                 }
1754             }
1755             break;
1756
1757         case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
1758             for (x=0; x < num_pixels; ++x)
1759             {
1760                 int pixel_index = x*channels;
1761
1762                 for (n = 0; n < num_nonalpha; n++)
1763                 {
1764                     int index = pixel_index + nonalpha[n];
1765                     ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
1766                 }
1767
1768                 if (!(stbir_info->flags & STBIR_FLAG_ALPHA_USES_COLORSPACE))
1769                     ((unsigned char *)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR8(encode_buffer[pixel_index+alpha_channel]);
1770             }
1771             break;
1772
1773         case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
1774             for (x=0; x < num_pixels; ++x)
1775             {
1776                 int pixel_index = x*channels;
1777
1778                 for (n = 0; n < channels; n++)
1779                 {
1780                     int index = pixel_index + n;
1781                     ((unsigned short*)output_buffer)[index] = STBIR__ENCODE_LINEAR16(encode_buffer[index]);
1782                 }
1783             }
1784             break;
1785
1786         case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
1787             for (x=0; x < num_pixels; ++x)
1788             {
1789                 int pixel_index = x*channels;
1790
1791                 for (n = 0; n < num_nonalpha; n++)
1792                 {
1793                     int index = pixel_index + nonalpha[n];
1794                     ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * stbir__max_uint16_as_float);
1795                 }
1796
1797                 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1798                     ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = STBIR__ENCODE_LINEAR16(encode_buffer[pixel_index + alpha_channel]);
1799             }
1800
1801             break;
1802
1803         case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
1804             for (x=0; x < num_pixels; ++x)
1805             {
1806                 int pixel_index = x*channels;
1807
1808                 for (n = 0; n < channels; n++)
1809                 {
1810                     int index = pixel_index + n;
1811                     ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__saturate(encode_buffer[index])) * stbir__max_uint32_as_float);
1812                 }
1813             }
1814             break;
1815
1816         case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
1817             for (x=0; x < num_pixels; ++x)
1818             {
1819                 int pixel_index = x*channels;
1820
1821                 for (n = 0; n < num_nonalpha; n++)
1822                 {
1823                     int index = pixel_index + nonalpha[n];
1824                     ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_UINT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * stbir__max_uint32_as_float);
1825                 }
1826
1827                 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1828                     ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * stbir__max_uint32_as_float);
1829             }
1830             break;
1831
1832         case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
1833             for (x=0; x < num_pixels; ++x)
1834             {
1835                 int pixel_index = x*channels;
1836
1837                 for (n = 0; n < channels; n++)
1838                 {
1839                     int index = pixel_index + n;
1840                     ((float*)output_buffer)[index] = encode_buffer[index];
1841                 }
1842             }
1843             break;
1844
1845         case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
1846             for (x=0; x < num_pixels; ++x)
1847             {
1848                 int pixel_index = x*channels;
1849
1850                 for (n = 0; n < num_nonalpha; n++)
1851                 {
1852                     int index = pixel_index + nonalpha[n];
1853                     ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
1854                 }
1855
1856                 if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
1857                     ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
1858             }
1859             break;
1860
1861         default:
1862             STBIR_ASSERT(!"Unknown type/colorspace/channels combination.");
1863             break;
1864     }
1865 }
1866
1867 static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n)
1868 {
1869     int x, k;
1870     int output_w = stbir_info->output_w;
1871     stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1872     float* vertical_coefficients = stbir_info->vertical_coefficients;
1873     int channels = stbir_info->channels;
1874     int alpha_channel = stbir_info->alpha_channel;
1875     int type = stbir_info->type;
1876     int colorspace = stbir_info->colorspace;
1877     int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1878     void* output_data = stbir_info->output_data;
1879     float* encode_buffer = stbir_info->encode_buffer;
1880     int decode = STBIR__DECODE(type, colorspace);
1881     int coefficient_width = stbir_info->vertical_coefficient_width;
1882     int coefficient_counter;
1883     int contributor = n;
1884
1885     float* ring_buffer = stbir_info->ring_buffer;
1886     int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
1887     int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
1888     int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
1889
1890     int n0,n1, output_row_start;
1891     int coefficient_group = coefficient_width * contributor;
1892
1893     n0 = vertical_contributors[contributor].n0;
1894     n1 = vertical_contributors[contributor].n1;
1895
1896     output_row_start = n * stbir_info->output_stride_bytes;
1897
1898     STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
1899
1900     memset(encode_buffer, 0, output_w * sizeof(float) * channels);
1901
1902     // I tried reblocking this for better cache usage of encode_buffer
1903     // (using x_outer, k, x_inner), but it lost speed. -- stb
1904
1905     coefficient_counter = 0;
1906     switch (channels) {
1907         case 1:
1908             for (k = n0; k <= n1; k++)
1909             {
1910                 int coefficient_index = coefficient_counter++;
1911                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1912                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1913                 for (x = 0; x < output_w; ++x)
1914                 {
1915                     int in_pixel_index = x * 1;
1916                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1917                 }
1918             }
1919             break;
1920         case 2:
1921             for (k = n0; k <= n1; k++)
1922             {
1923                 int coefficient_index = coefficient_counter++;
1924                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1925                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1926                 for (x = 0; x < output_w; ++x)
1927                 {
1928                     int in_pixel_index = x * 2;
1929                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1930                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1931                 }
1932             }
1933             break;
1934         case 3:
1935             for (k = n0; k <= n1; k++)
1936             {
1937                 int coefficient_index = coefficient_counter++;
1938                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1939                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1940                 for (x = 0; x < output_w; ++x)
1941                 {
1942                     int in_pixel_index = x * 3;
1943                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1944                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1945                     encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1946                 }
1947             }
1948             break;
1949         case 4:
1950             for (k = n0; k <= n1; k++)
1951             {
1952                 int coefficient_index = coefficient_counter++;
1953                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1954                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1955                 for (x = 0; x < output_w; ++x)
1956                 {
1957                     int in_pixel_index = x * 4;
1958                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
1959                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
1960                     encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
1961                     encode_buffer[in_pixel_index + 3] += ring_buffer_entry[in_pixel_index + 3] * coefficient;
1962                 }
1963             }
1964             break;
1965         default:
1966             for (k = n0; k <= n1; k++)
1967             {
1968                 int coefficient_index = coefficient_counter++;
1969                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
1970                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
1971                 for (x = 0; x < output_w; ++x)
1972                 {
1973                     int in_pixel_index = x * channels;
1974                     int c;
1975                     for (c = 0; c < channels; c++)
1976                         encode_buffer[in_pixel_index + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
1977                 }
1978             }
1979             break;
1980     }
1981     stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, encode_buffer, channels, alpha_channel, decode);
1982 }
1983
1984 static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n)
1985 {
1986     int x, k;
1987     int output_w = stbir_info->output_w;
1988     stbir__contributors* vertical_contributors = stbir_info->vertical_contributors;
1989     float* vertical_coefficients = stbir_info->vertical_coefficients;
1990     int channels = stbir_info->channels;
1991     int ring_buffer_entries = stbir_info->ring_buffer_num_entries;
1992     float* horizontal_buffer = stbir_info->horizontal_buffer;
1993     int coefficient_width = stbir_info->vertical_coefficient_width;
1994     int contributor = n + stbir_info->vertical_filter_pixel_margin;
1995
1996     float* ring_buffer = stbir_info->ring_buffer;
1997     int ring_buffer_begin_index = stbir_info->ring_buffer_begin_index;
1998     int ring_buffer_first_scanline = stbir_info->ring_buffer_first_scanline;
1999     int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
2000     int n0,n1;
2001
2002     n0 = vertical_contributors[contributor].n0;
2003     n1 = vertical_contributors[contributor].n1;
2004
2005     STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
2006
2007     for (k = n0; k <= n1; k++)
2008     {
2009         int coefficient_index = k - n0;
2010         int coefficient_group = coefficient_width * contributor;
2011         float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
2012
2013         float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, ring_buffer_entries, ring_buffer_length);
2014
2015         switch (channels) {
2016             case 1:
2017                 for (x = 0; x < output_w; x++)
2018                 {
2019                     int in_pixel_index = x * 1;
2020                     ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2021                 }
2022                 break;
2023             case 2:
2024                 for (x = 0; x < output_w; x++)
2025                 {
2026                     int in_pixel_index = x * 2;
2027                     ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2028                     ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2029                 }
2030                 break;
2031             case 3:
2032                 for (x = 0; x < output_w; x++)
2033                 {
2034                     int in_pixel_index = x * 3;
2035                     ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2036                     ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2037                     ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
2038                 }
2039                 break;
2040             case 4:
2041                 for (x = 0; x < output_w; x++)
2042                 {
2043                     int in_pixel_index = x * 4;
2044                     ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
2045                     ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
2046                     ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
2047                     ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
2048                 }
2049                 break;
2050             default:
2051                 for (x = 0; x < output_w; x++)
2052                 {
2053                     int in_pixel_index = x * channels;
2054
2055                     int c;
2056                     for (c = 0; c < channels; c++)
2057                         ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
2058                 }
2059                 break;
2060         }
2061     }
2062 }
2063
2064 static void stbir__buffer_loop_upsample(stbir__info* stbir_info)
2065 {
2066     int y;
2067     float scale_ratio = stbir_info->vertical_scale;
2068     float out_scanlines_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(1/scale_ratio) * scale_ratio;
2069
2070     STBIR_ASSERT(stbir__use_height_upsampling(stbir_info));
2071
2072     for (y = 0; y < stbir_info->output_h; y++)
2073     {
2074         float in_center_of_out = 0; // Center of the current out scanline in the in scanline space
2075         int in_first_scanline = 0, in_last_scanline = 0;
2076
2077         stbir__calculate_sample_range_upsample(y, out_scanlines_radius, scale_ratio, stbir_info->vertical_shift, &in_first_scanline, &in_last_scanline, &in_center_of_out);
2078
2079         STBIR_ASSERT(in_last_scanline - in_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
2080
2081         if (stbir_info->ring_buffer_begin_index >= 0)
2082         {
2083             // Get rid of whatever we don't need anymore.
2084             while (in_first_scanline > stbir_info->ring_buffer_first_scanline)
2085             {
2086                 if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
2087                 {
2088                     // We just popped the last scanline off the ring buffer.
2089                     // Reset it to the empty state.
2090                     stbir_info->ring_buffer_begin_index = -1;
2091                     stbir_info->ring_buffer_first_scanline = 0;
2092                     stbir_info->ring_buffer_last_scanline = 0;
2093                     break;
2094                 }
2095                 else
2096                 {
2097                     stbir_info->ring_buffer_first_scanline++;
2098                     stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
2099                 }
2100             }
2101         }
2102
2103         // Load in new ones.
2104         if (stbir_info->ring_buffer_begin_index < 0)
2105             stbir__decode_and_resample_upsample(stbir_info, in_first_scanline);
2106
2107         while (in_last_scanline > stbir_info->ring_buffer_last_scanline)
2108             stbir__decode_and_resample_upsample(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
2109
2110         // Now all buffers should be ready to write a row of vertical sampling.
2111         stbir__resample_vertical_upsample(stbir_info, y);
2112
2113         STBIR_PROGRESS_REPORT((float)y / stbir_info->output_h);
2114     }
2115 }
2116
2117 static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessary_scanline)
2118 {
2119     int output_stride_bytes = stbir_info->output_stride_bytes;
2120     int channels = stbir_info->channels;
2121     int alpha_channel = stbir_info->alpha_channel;
2122     int type = stbir_info->type;
2123     int colorspace = stbir_info->colorspace;
2124     int output_w = stbir_info->output_w;
2125     void* output_data = stbir_info->output_data;
2126     int decode = STBIR__DECODE(type, colorspace);
2127
2128     float* ring_buffer = stbir_info->ring_buffer;
2129     int ring_buffer_length = stbir_info->ring_buffer_length_bytes/sizeof(float);
2130
2131     if (stbir_info->ring_buffer_begin_index >= 0)
2132     {
2133         // Get rid of whatever we don't need anymore.
2134         while (first_necessary_scanline > stbir_info->ring_buffer_first_scanline)
2135         {
2136             if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
2137             {
2138                 int output_row_start = stbir_info->ring_buffer_first_scanline * output_stride_bytes;
2139                 float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
2140                 stbir__encode_scanline(stbir_info, output_w, (char *) output_data + output_row_start, ring_buffer_entry, channels, alpha_channel, decode);
2141                 STBIR_PROGRESS_REPORT((float)stbir_info->ring_buffer_first_scanline / stbir_info->output_h);
2142             }
2143
2144             if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
2145             {
2146                 // We just popped the last scanline off the ring buffer.
2147                 // Reset it to the empty state.
2148                 stbir_info->ring_buffer_begin_index = -1;
2149                 stbir_info->ring_buffer_first_scanline = 0;
2150                 stbir_info->ring_buffer_last_scanline = 0;
2151                 break;
2152             }
2153             else
2154             {
2155                 stbir_info->ring_buffer_first_scanline++;
2156                 stbir_info->ring_buffer_begin_index = (stbir_info->ring_buffer_begin_index + 1) % stbir_info->ring_buffer_num_entries;
2157             }
2158         }
2159     }
2160 }
2161
2162 static void stbir__buffer_loop_downsample(stbir__info* stbir_info)
2163 {
2164     int y;
2165     float scale_ratio = stbir_info->vertical_scale;
2166     int output_h = stbir_info->output_h;
2167     float in_pixels_radius = stbir__filter_info_table[stbir_info->vertical_filter].support(scale_ratio) / scale_ratio;
2168     int pixel_margin = stbir_info->vertical_filter_pixel_margin;
2169     int max_y = stbir_info->input_h + pixel_margin;
2170
2171     STBIR_ASSERT(!stbir__use_height_upsampling(stbir_info));
2172
2173     for (y = -pixel_margin; y < max_y; y++)
2174     {
2175         float out_center_of_in; // Center of the current out scanline in the in scanline space
2176         int out_first_scanline, out_last_scanline;
2177
2178         stbir__calculate_sample_range_downsample(y, in_pixels_radius, scale_ratio, stbir_info->vertical_shift, &out_first_scanline, &out_last_scanline, &out_center_of_in);
2179
2180         STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
2181
2182         if (out_last_scanline < 0 || out_first_scanline >= output_h)
2183             continue;
2184
2185         stbir__empty_ring_buffer(stbir_info, out_first_scanline);
2186
2187         stbir__decode_and_resample_downsample(stbir_info, y);
2188
2189         // Load in new ones.
2190         if (stbir_info->ring_buffer_begin_index < 0)
2191             stbir__add_empty_ring_buffer_entry(stbir_info, out_first_scanline);
2192
2193         while (out_last_scanline > stbir_info->ring_buffer_last_scanline)
2194             stbir__add_empty_ring_buffer_entry(stbir_info, stbir_info->ring_buffer_last_scanline + 1);
2195
2196         // Now the horizontal buffer is ready to write to all ring buffer rows.
2197         stbir__resample_vertical_downsample(stbir_info, y);
2198     }
2199
2200     stbir__empty_ring_buffer(stbir_info, stbir_info->output_h);
2201 }
2202
2203 static void stbir__setup(stbir__info *info, int input_w, int input_h, int output_w, int output_h, int channels)
2204 {
2205     info->input_w = input_w;
2206     info->input_h = input_h;
2207     info->output_w = output_w;
2208     info->output_h = output_h;
2209     info->channels = channels;
2210 }
2211
2212 static void stbir__calculate_transform(stbir__info *info, float s0, float t0, float s1, float t1, float *transform)
2213 {
2214     info->s0 = s0;
2215     info->t0 = t0;
2216     info->s1 = s1;
2217     info->t1 = t1;
2218
2219     if (transform)
2220     {
2221         info->horizontal_scale = transform[0];
2222         info->vertical_scale   = transform[1];
2223         info->horizontal_shift = transform[2];
2224         info->vertical_shift   = transform[3];
2225     }
2226     else
2227     {
2228         info->horizontal_scale = ((float)info->output_w / info->input_w) / (s1 - s0);
2229         info->vertical_scale = ((float)info->output_h / info->input_h) / (t1 - t0);
2230
2231         info->horizontal_shift = s0 * info->output_w / (s1 - s0);
2232         info->vertical_shift = t0 * info->output_h / (t1 - t0);
2233     }
2234 }
2235
2236 static void stbir__choose_filter(stbir__info *info, stbir_filter h_filter, stbir_filter v_filter)
2237 {
2238     if (h_filter == 0)
2239         h_filter = stbir__use_upsampling(info->horizontal_scale) ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
2240     if (v_filter == 0)
2241         v_filter = stbir__use_upsampling(info->vertical_scale)   ? STBIR_DEFAULT_FILTER_UPSAMPLE : STBIR_DEFAULT_FILTER_DOWNSAMPLE;
2242     info->horizontal_filter = h_filter;
2243     info->vertical_filter = v_filter;
2244 }
2245
2246 static stbir_uint32 stbir__calculate_memory(stbir__info *info)
2247 {
2248     int pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
2249     int filter_height = stbir__get_filter_pixel_width(info->vertical_filter, info->vertical_scale);
2250
2251     info->horizontal_num_contributors = stbir__get_contributors(info->horizontal_scale, info->horizontal_filter, info->input_w, info->output_w);
2252     info->vertical_num_contributors   = stbir__get_contributors(info->vertical_scale  , info->vertical_filter  , info->input_h, info->output_h);
2253
2254     // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
2255     info->ring_buffer_num_entries = filter_height + 1;
2256
2257     info->horizontal_contributors_size = info->horizontal_num_contributors * sizeof(stbir__contributors);
2258     info->horizontal_coefficients_size = stbir__get_total_horizontal_coefficients(info) * sizeof(float);
2259     info->vertical_contributors_size = info->vertical_num_contributors * sizeof(stbir__contributors);
2260     info->vertical_coefficients_size = stbir__get_total_vertical_coefficients(info) * sizeof(float);
2261     info->decode_buffer_size = (info->input_w + pixel_margin * 2) * info->channels * sizeof(float);
2262     info->horizontal_buffer_size = info->output_w * info->channels * sizeof(float);
2263     info->ring_buffer_size = info->output_w * info->channels * info->ring_buffer_num_entries * sizeof(float);
2264     info->encode_buffer_size = info->output_w * info->channels * sizeof(float);
2265
2266     STBIR_ASSERT(info->horizontal_filter != 0);
2267     STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
2268     STBIR_ASSERT(info->vertical_filter != 0);
2269     STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
2270
2271     if (stbir__use_height_upsampling(info))
2272         // The horizontal buffer is for when we're downsampling the height and we
2273         // can't output the result of sampling the decode buffer directly into the
2274         // ring buffers.
2275         info->horizontal_buffer_size = 0;
2276     else
2277         // The encode buffer is to retain precision in the height upsampling method
2278         // and isn't used when height downsampling.
2279         info->encode_buffer_size = 0;
2280
2281     return info->horizontal_contributors_size + info->horizontal_coefficients_size
2282         + info->vertical_contributors_size + info->vertical_coefficients_size
2283         + info->decode_buffer_size + info->horizontal_buffer_size
2284         + info->ring_buffer_size + info->encode_buffer_size;
2285 }
2286
2287 static int stbir__resize_allocated(stbir__info *info,
2288     const void* input_data, int input_stride_in_bytes,
2289     void* output_data, int output_stride_in_bytes,
2290     int alpha_channel, stbir_uint32 flags, stbir_datatype type,
2291     stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace,
2292     void* tempmem, size_t tempmem_size_in_bytes)
2293 {
2294     size_t memory_required = stbir__calculate_memory(info);
2295
2296     int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : info->channels * info->input_w * stbir__type_size[type];
2297     int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : info->channels * info->output_w * stbir__type_size[type];
2298
2299 #ifdef STBIR_DEBUG_OVERWRITE_TEST
2300 #define OVERWRITE_ARRAY_SIZE 8
2301     unsigned char overwrite_output_before_pre[OVERWRITE_ARRAY_SIZE];
2302     unsigned char overwrite_tempmem_before_pre[OVERWRITE_ARRAY_SIZE];
2303     unsigned char overwrite_output_after_pre[OVERWRITE_ARRAY_SIZE];
2304     unsigned char overwrite_tempmem_after_pre[OVERWRITE_ARRAY_SIZE];
2305
2306     size_t begin_forbidden = width_stride_output * (info->output_h - 1) + info->output_w * info->channels * stbir__type_size[type];
2307     memcpy(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
2308     memcpy(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
2309     memcpy(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE);
2310     memcpy(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE);
2311 #endif
2312
2313     STBIR_ASSERT(info->channels >= 0);
2314     STBIR_ASSERT(info->channels <= STBIR_MAX_CHANNELS);
2315
2316     if (info->channels < 0 || info->channels > STBIR_MAX_CHANNELS)
2317         return 0;
2318
2319     STBIR_ASSERT(info->horizontal_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
2320     STBIR_ASSERT(info->vertical_filter < STBIR__ARRAY_SIZE(stbir__filter_info_table));
2321
2322     if (info->horizontal_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
2323         return 0;
2324     if (info->vertical_filter >= STBIR__ARRAY_SIZE(stbir__filter_info_table))
2325         return 0;
2326
2327     if (alpha_channel < 0)
2328         flags |= STBIR_FLAG_ALPHA_USES_COLORSPACE | STBIR_FLAG_ALPHA_PREMULTIPLIED;
2329
2330     if (!(flags&STBIR_FLAG_ALPHA_USES_COLORSPACE) || !(flags&STBIR_FLAG_ALPHA_PREMULTIPLIED)) {
2331         STBIR_ASSERT(alpha_channel >= 0 && alpha_channel < info->channels);
2332     }
2333
2334     if (alpha_channel >= info->channels)
2335         return 0;
2336
2337     STBIR_ASSERT(tempmem);
2338
2339     if (!tempmem)
2340         return 0;
2341
2342     STBIR_ASSERT(tempmem_size_in_bytes >= memory_required);
2343
2344     if (tempmem_size_in_bytes < memory_required)
2345         return 0;
2346
2347     memset(tempmem, 0, tempmem_size_in_bytes);
2348
2349     info->input_data = input_data;
2350     info->input_stride_bytes = width_stride_input;
2351
2352     info->output_data = output_data;
2353     info->output_stride_bytes = width_stride_output;
2354
2355     info->alpha_channel = alpha_channel;
2356     info->flags = flags;
2357     info->type = type;
2358     info->edge_horizontal = edge_horizontal;
2359     info->edge_vertical = edge_vertical;
2360     info->colorspace = colorspace;
2361
2362     info->horizontal_coefficient_width   = stbir__get_coefficient_width  (info->horizontal_filter, info->horizontal_scale);
2363     info->vertical_coefficient_width     = stbir__get_coefficient_width  (info->vertical_filter  , info->vertical_scale  );
2364     info->horizontal_filter_pixel_width  = stbir__get_filter_pixel_width (info->horizontal_filter, info->horizontal_scale);
2365     info->vertical_filter_pixel_width    = stbir__get_filter_pixel_width (info->vertical_filter  , info->vertical_scale  );
2366     info->horizontal_filter_pixel_margin = stbir__get_filter_pixel_margin(info->horizontal_filter, info->horizontal_scale);
2367     info->vertical_filter_pixel_margin   = stbir__get_filter_pixel_margin(info->vertical_filter  , info->vertical_scale  );
2368
2369     info->ring_buffer_length_bytes = info->output_w * info->channels * sizeof(float);
2370     info->decode_buffer_pixels = info->input_w + info->horizontal_filter_pixel_margin * 2;
2371
2372 #define STBIR__NEXT_MEMPTR(current, newtype) (newtype*)(((unsigned char*)current) + current##_size)
2373
2374     info->horizontal_contributors = (stbir__contributors *) tempmem;
2375     info->horizontal_coefficients = STBIR__NEXT_MEMPTR(info->horizontal_contributors, float);
2376     info->vertical_contributors = STBIR__NEXT_MEMPTR(info->horizontal_coefficients, stbir__contributors);
2377     info->vertical_coefficients = STBIR__NEXT_MEMPTR(info->vertical_contributors, float);
2378     info->decode_buffer = STBIR__NEXT_MEMPTR(info->vertical_coefficients, float);
2379
2380     if (stbir__use_height_upsampling(info))
2381     {
2382         info->horizontal_buffer = NULL;
2383         info->ring_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2384         info->encode_buffer = STBIR__NEXT_MEMPTR(info->ring_buffer, float);
2385
2386         STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->encode_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2387     }
2388     else
2389     {
2390         info->horizontal_buffer = STBIR__NEXT_MEMPTR(info->decode_buffer, float);
2391         info->ring_buffer = STBIR__NEXT_MEMPTR(info->horizontal_buffer, float);
2392         info->encode_buffer = NULL;
2393
2394         STBIR_ASSERT((size_t)STBIR__NEXT_MEMPTR(info->ring_buffer, unsigned char) == (size_t)tempmem + tempmem_size_in_bytes);
2395     }
2396
2397 #undef STBIR__NEXT_MEMPTR
2398
2399     // This signals that the ring buffer is empty
2400     info->ring_buffer_begin_index = -1;
2401
2402     stbir__calculate_filters(info->horizontal_contributors, info->horizontal_coefficients, info->horizontal_filter, info->horizontal_scale, info->horizontal_shift, info->input_w, info->output_w);
2403     stbir__calculate_filters(info->vertical_contributors, info->vertical_coefficients, info->vertical_filter, info->vertical_scale, info->vertical_shift, info->input_h, info->output_h);
2404
2405     STBIR_PROGRESS_REPORT(0);
2406
2407     if (stbir__use_height_upsampling(info))
2408         stbir__buffer_loop_upsample(info);
2409     else
2410         stbir__buffer_loop_downsample(info);
2411
2412     STBIR_PROGRESS_REPORT(1);
2413
2414 #ifdef STBIR_DEBUG_OVERWRITE_TEST
2415     STBIR_ASSERT(memcmp(overwrite_output_before_pre, &((unsigned char*)output_data)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2416     STBIR_ASSERT(memcmp(overwrite_output_after_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
2417     STBIR_ASSERT(memcmp(overwrite_tempmem_before_pre, &((unsigned char*)tempmem)[-OVERWRITE_ARRAY_SIZE], OVERWRITE_ARRAY_SIZE) == 0);
2418     STBIR_ASSERT(memcmp(overwrite_tempmem_after_pre, &((unsigned char*)tempmem)[tempmem_size_in_bytes], OVERWRITE_ARRAY_SIZE) == 0);
2419 #endif
2420
2421     return 1;
2422 }
2423
2424
2425 static int stbir__resize_arbitrary(
2426     void *alloc_context,
2427     const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
2428     void* output_data, int output_w, int output_h, int output_stride_in_bytes,
2429     float s0, float t0, float s1, float t1, float *transform,
2430     int channels, int alpha_channel, stbir_uint32 flags, stbir_datatype type,
2431     stbir_filter h_filter, stbir_filter v_filter,
2432     stbir_edge edge_horizontal, stbir_edge edge_vertical, stbir_colorspace colorspace)
2433 {
2434     stbir__info info;
2435     int result;
2436     size_t memory_required;
2437     void* extra_memory;
2438
2439     stbir__setup(&info, input_w, input_h, output_w, output_h, channels);
2440     stbir__calculate_transform(&info, s0,t0,s1,t1,transform);
2441     stbir__choose_filter(&info, h_filter, v_filter);
2442     memory_required = stbir__calculate_memory(&info);
2443     extra_memory = STBIR_MALLOC(memory_required, alloc_context);
2444
2445     if (!extra_memory)
2446         return 0;
2447
2448     result = stbir__resize_allocated(&info, input_data, input_stride_in_bytes,
2449                                             output_data, output_stride_in_bytes,
2450                                             alpha_channel, flags, type,
2451                                             edge_horizontal, edge_vertical,
2452                                             colorspace, extra_memory, memory_required);
2453
2454     STBIR_FREE(extra_memory, alloc_context);
2455
2456     return result;
2457 }
2458
2459 STBIRDEF int stbir_resize_uint8(     const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2460                                            unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2461                                      int num_channels)
2462 {
2463     return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2464         output_pixels, output_w, output_h, output_stride_in_bytes,
2465         0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2466         STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2467 }
2468
2469 STBIRDEF int stbir_resize_float(     const float *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2470                                            float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2471                                      int num_channels)
2472 {
2473     return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2474         output_pixels, output_w, output_h, output_stride_in_bytes,
2475         0,0,1,1,NULL,num_channels,-1,0, STBIR_TYPE_FLOAT, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2476         STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_LINEAR);
2477 }
2478
2479 STBIRDEF int stbir_resize_uint8_srgb(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2480                                            unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2481                                      int num_channels, int alpha_channel, int flags)
2482 {
2483     return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2484         output_pixels, output_w, output_h, output_stride_in_bytes,
2485         0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2486         STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_COLORSPACE_SRGB);
2487 }
2488
2489 STBIRDEF int stbir_resize_uint8_srgb_edgemode(const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2490                                                     unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2491                                               int num_channels, int alpha_channel, int flags,
2492                                               stbir_edge edge_wrap_mode)
2493 {
2494     return stbir__resize_arbitrary(NULL, input_pixels, input_w, input_h, input_stride_in_bytes,
2495         output_pixels, output_w, output_h, output_stride_in_bytes,
2496         0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT,
2497         edge_wrap_mode, edge_wrap_mode, STBIR_COLORSPACE_SRGB);
2498 }
2499
2500 STBIRDEF int stbir_resize_uint8_generic( const unsigned char *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2501                                                unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2502                                          int num_channels, int alpha_channel, int flags,
2503                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2504                                          void *alloc_context)
2505 {
2506     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2507         output_pixels, output_w, output_h, output_stride_in_bytes,
2508         0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT8, filter, filter,
2509         edge_wrap_mode, edge_wrap_mode, space);
2510 }
2511
2512 STBIRDEF int stbir_resize_uint16_generic(const stbir_uint16 *input_pixels  , int input_w , int input_h , int input_stride_in_bytes,
2513                                                stbir_uint16 *output_pixels , int output_w, int output_h, int output_stride_in_bytes,
2514                                          int num_channels, int alpha_channel, int flags,
2515                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2516                                          void *alloc_context)
2517 {
2518     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2519         output_pixels, output_w, output_h, output_stride_in_bytes,
2520         0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_UINT16, filter, filter,
2521         edge_wrap_mode, edge_wrap_mode, space);
2522 }
2523
2524
2525 STBIRDEF int stbir_resize_float_generic( const float *input_pixels         , int input_w , int input_h , int input_stride_in_bytes,
2526                                                float *output_pixels        , int output_w, int output_h, int output_stride_in_bytes,
2527                                          int num_channels, int alpha_channel, int flags,
2528                                          stbir_edge edge_wrap_mode, stbir_filter filter, stbir_colorspace space,
2529                                          void *alloc_context)
2530 {
2531     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2532         output_pixels, output_w, output_h, output_stride_in_bytes,
2533         0,0,1,1,NULL,num_channels,alpha_channel,flags, STBIR_TYPE_FLOAT, filter, filter,
2534         edge_wrap_mode, edge_wrap_mode, space);
2535 }
2536
2537
2538 STBIRDEF int stbir_resize(         const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2539                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2540                                    stbir_datatype datatype,
2541                                    int num_channels, int alpha_channel, int flags,
2542                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2543                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
2544                                    stbir_colorspace space, void *alloc_context)
2545 {
2546     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2547         output_pixels, output_w, output_h, output_stride_in_bytes,
2548         0,0,1,1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2549         edge_mode_horizontal, edge_mode_vertical, space);
2550 }
2551
2552
2553 STBIRDEF int stbir_resize_subpixel(const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2554                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2555                                    stbir_datatype datatype,
2556                                    int num_channels, int alpha_channel, int flags,
2557                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2558                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
2559                                    stbir_colorspace space, void *alloc_context,
2560                                    float x_scale, float y_scale,
2561                                    float x_offset, float y_offset)
2562 {
2563     float transform[4];
2564     transform[0] = x_scale;
2565     transform[1] = y_scale;
2566     transform[2] = x_offset;
2567     transform[3] = y_offset;
2568     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2569         output_pixels, output_w, output_h, output_stride_in_bytes,
2570         0,0,1,1,transform,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2571         edge_mode_horizontal, edge_mode_vertical, space);
2572 }
2573
2574 STBIRDEF int stbir_resize_region(  const void *input_pixels , int input_w , int input_h , int input_stride_in_bytes,
2575                                          void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
2576                                    stbir_datatype datatype,
2577                                    int num_channels, int alpha_channel, int flags,
2578                                    stbir_edge edge_mode_horizontal, stbir_edge edge_mode_vertical,
2579                                    stbir_filter filter_horizontal,  stbir_filter filter_vertical,
2580                                    stbir_colorspace space, void *alloc_context,
2581                                    float s0, float t0, float s1, float t1)
2582 {
2583     return stbir__resize_arbitrary(alloc_context, input_pixels, input_w, input_h, input_stride_in_bytes,
2584         output_pixels, output_w, output_h, output_stride_in_bytes,
2585         s0,t0,s1,t1,NULL,num_channels,alpha_channel,flags, datatype, filter_horizontal, filter_vertical,
2586         edge_mode_horizontal, edge_mode_vertical, space);
2587 }
2588
2589 #endif // STB_IMAGE_RESIZE_IMPLEMENTATION
2590
2591 /*
2592 ------------------------------------------------------------------------------
2593 This software is available under 2 licenses -- choose whichever you prefer.
2594 ------------------------------------------------------------------------------
2595 ALTERNATIVE A - MIT License
2596 Copyright (c) 2017 Sean Barrett
2597 Permission is hereby granted, free of charge, to any person obtaining a copy of
2598 this software and associated documentation files (the "Software"), to deal in
2599 the Software without restriction, including without limitation the rights to
2600 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
2601 of the Software, and to permit persons to whom the Software is furnished to do
2602 so, subject to the following conditions:
2603 The above copyright notice and this permission notice shall be included in all
2604 copies or substantial portions of the Software.
2605 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2606 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2607 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2608 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2609 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2610 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2611 SOFTWARE.
2612 ------------------------------------------------------------------------------
2613 ALTERNATIVE B - Public Domain (www.unlicense.org)
2614 This is free and unencumbered software released into the public domain.
2615 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
2616 software, either in source code form or as a compiled binary, for any purpose,
2617 commercial or non-commercial, and by any means.
2618 In jurisdictions that recognize copyright laws, the author or authors of this
2619 software dedicate any and all copyright interest in the software to the public
2620 domain. We make this dedication for the benefit of the public at large and to
2621 the detriment of our heirs and successors. We intend this dedication to be an
2622 overt act of relinquishment in perpetuity of all present and future rights to
2623 this software under copyright law.
2624 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2625 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2626 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2627 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2628 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2629 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2630 ------------------------------------------------------------------------------
2631 */