From: Oskar Arvidsson <oskar@irock.se>
Date: Mon, 27 Sep 2010 14:02:20 +0000 (+0200)
Subject: Finish support for high-depth video throughout x264
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=0467589e35295c522bdae382e0e3b021deea9919;p=x264

Finish support for high-depth video throughout x264
Add support for high depth input in libx264.
Add support for 16-bit colorspaces in the filtering system.
Add support for input bit depths in the interval [9,16] with the raw demuxer.
Add a depth filter to dither input to x264.
---

diff --git a/Makefile b/Makefile
index bab55e54..0cd7b82d 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
          output/flv.c output/flv_bytestream.c filters/filters.c \
          filters/video/video.c filters/video/source.c filters/video/internal.c \
          filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
-         filters/video/select_every.c filters/video/crop.c
+         filters/video/select_every.c filters/video/crop.c filters/video/depth.c
 
 SRCSO =
 
diff --git a/common/common.c b/common/common.c
index c329cb0e..aff5fc39 100644
--- a/common/common.c
+++ b/common/common.c
@@ -33,6 +33,8 @@
 #include <malloc.h>
 #endif
 
+const int x264_bit_depth = BIT_DEPTH;
+
 static void x264_log_default( void *, int, const char *, va_list );
 
 /****************************************************************************
@@ -1047,19 +1049,20 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
     pic->img.i_plane = csp == X264_CSP_NV12 ? 2 : 3;
-    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
+    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
+    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 * depth_factor );
     if( !pic->img.plane[0] )
         return -1;
-    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
+    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height * depth_factor;
     if( csp != X264_CSP_NV12 )
-        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
-    pic->img.i_stride[0] = i_width;
+        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4 * depth_factor;
+    pic->img.i_stride[0] = i_width * depth_factor;
     if( csp == X264_CSP_NV12 )
-        pic->img.i_stride[1] = i_width;
+        pic->img.i_stride[1] = i_width * depth_factor;
     else
     {
-        pic->img.i_stride[1] = i_width / 2;
-        pic->img.i_stride[2] = i_width / 2;
+        pic->img.i_stride[1] = i_width / 2 * depth_factor;
+        pic->img.i_stride[2] = i_width / 2 * depth_factor;
     }
     return 0;
 }
diff --git a/common/frame.c b/common/frame.c
index 92d4c7c4..b24d9f92 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -263,6 +263,20 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
         return -1;
     }
 
+#if X264_HIGH_BIT_DEPTH
+    if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
+    {
+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
+        return -1;
+    }
+#else
+    if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
+    {
+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
+        return -1;
+    }
+#endif
+
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
diff --git a/common/mc.c b/common/mc.c
index 5b58a764..acc23120 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -302,12 +302,7 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
 {
     while( h-- )
     {
-#if X264_HIGH_BIT_DEPTH
-        for( int i = 0; i < w; i++ )
-            dst[i] = src[i] << (BIT_DEPTH-8);
-#else
-        memcpy( dst, src, w );
-#endif
+        memcpy( dst, src, w * sizeof(pixel) );
         dst += i_dst;
         src += i_src;
     }
@@ -320,8 +315,8 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
     for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
         for( int x=0; x<w; x++ )
         {
-            dst[2*x]   = srcu[x] << (BIT_DEPTH-8);
-            dst[2*x+1] = srcv[x] << (BIT_DEPTH-8);
+            dst[2*x]   = ((pixel*)srcu)[x];
+            dst[2*x+1] = ((pixel*)srcv)[x];
         }
 }
 
diff --git a/encoder/encoder.c b/encoder/encoder.c
index f83ed619..a07c9ed8 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2777,12 +2777,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
 
     pic_out->img.i_csp = X264_CSP_NV12;
+#if X264_HIGH_BIT_DEPTH
+    pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
+#endif
     pic_out->img.i_plane = h->fdec->i_plane;
     for( int i = 0; i < 2; i++ )
     {
-        pic_out->img.i_stride[i] = h->fdec->i_stride[i];
-        // FIXME This breaks the API when pixel != uint8_t.
-        pic_out->img.plane[i] = h->fdec->plane[i];
+        pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
+        pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
     }
 
     x264_frame_push_unused( thread_current, h->fenc );
diff --git a/filters/video/crop.c b/filters/video/crop.c
index 2a3c2b15..b70476ed 100644
--- a/filters/video/crop.c
+++ b/filters/video/crop.c
@@ -103,8 +103,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
     output->img.height = h->dims[3];
     /* shift the plane pointers down 'top' rows and right 'left' columns. */
     for( int i = 0; i < output->img.planes; i++ )
-        output->img.plane[i] += (int)(output->img.stride[i] * h->dims[1] * h->csp->height[i]
-                                    + h->dims[0] * h->csp->width[i]);
+    {
+        intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
+        offset += h->dims[0] * h->csp->width[i];
+        offset *= x264_cli_csp_depth_factor( output->img.csp );
+        output->img.plane[i] += offset;
+    }
     return 0;
 }
 
diff --git a/filters/video/depth.c b/filters/video/depth.c
new file mode 100644
index 00000000..92dbeb8c
--- /dev/null
+++ b/filters/video/depth.c
@@ -0,0 +1,227 @@
+/*****************************************************************************
+ * depth.c: x264 video depth filter
+ *****************************************************************************
+ * Copyright (C) 2010 Oskar Arvidsson <oskar@irock.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "video.h"
+#define NAME "depth"
+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
+
+cli_vid_filter_t depth_filter;
+
+typedef struct
+{
+    hnd_t prev_hnd;
+    cli_vid_filter_t prev_filter;
+
+    int bit_depth;
+    int dst_csp;
+    cli_pic_t buffer;
+    int16_t *error_buf;
+} depth_hnd_t;
+
+static int depth_filter_csp_is_supported( int csp )
+{
+    int csp_mask = csp & X264_CSP_MASK;
+    return csp_mask == X264_CSP_I420 ||
+           csp_mask == X264_CSP_I422 ||
+           csp_mask == X264_CSP_I444 ||
+           csp_mask == X264_CSP_YV12 ||
+           csp_mask == X264_CSP_NV12;
+}
+
+static int csp_num_interleaved( int csp, int plane )
+{
+    int csp_mask = csp & X264_CSP_MASK;
+    return ( csp_mask == X264_CSP_NV12 && plane == 1 ) ? 2 : 1;
+}
+
+/* The dithering algorithm is based on Sierra-2-4A error diffusion. It has been
+ * written in such a way so that if the source has been upconverted using the
+ * same algorithm as used in scale_image, dithering down to the source bit
+ * depth again is lossless. */
+#define DITHER_PLANE( pitch ) \
+static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
+                                        int width, int height, int16_t *errors ) \
+{ \
+    const int lshift = 16-BIT_DEPTH; \
+    const int rshift = 2*BIT_DEPTH-16; \
+    const int pixel_max = (1 << BIT_DEPTH)-1; \
+    const int half = 1 << (16-BIT_DEPTH); \
+    memset( errors, 0, (width+1) * sizeof(int16_t) ); \
+    for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
+    { \
+        int err = 0; \
+        for( int x = 0; x < width; x++ ) \
+        { \
+            err = err*2 + errors[x] + errors[x+1]; \
+            dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
+            errors[x] = err = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
+        } \
+    } \
+}
+
+DITHER_PLANE( 1 )
+DITHER_PLANE( 2 )
+
+static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
+{
+    int csp_mask = img->csp & X264_CSP_MASK;
+    for( int i = 0; i < img->planes; i++ )
+    {
+        int num_interleaved = csp_num_interleaved( img->csp, i );
+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
+        int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved;
+
+#define CALL_DITHER_PLANE( pitch, off ) \
+        dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
+                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
+
+        if( num_interleaved == 1 )
+        {
+            CALL_DITHER_PLANE( 1, 0 );
+        }
+        else
+        {
+            CALL_DITHER_PLANE( 2, 0 );
+            CALL_DITHER_PLANE( 2, 1 );
+        }
+    }
+}
+
+static void scale_image( cli_image_t *output, cli_image_t *img )
+{
+    /* this function mimics how swscale does upconversion. 8-bit is converted
+     * to 16-bit through left shifting the orginal value with 8 and then adding
+     * the original value to that. This effectively keeps the full color range
+     * while also being fast. for n-bit we basically do the same thing, but we
+     * discard the lower 16-n bits. */
+    int csp_mask = img->csp & X264_CSP_MASK;
+    const int shift = 16-BIT_DEPTH;
+    for( int i = 0; i < img->planes; i++ )
+    {
+        uint8_t *src = img->plane[i];
+        uint16_t *dst = (uint16_t*)output->plane[i];
+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
+        int width = x264_cli_csps[csp_mask].width[i] * img->width;
+
+        for( int j = 0; j < height; j++ )
+        {
+            for( int k = 0; k < width; k++ )
+                dst[k] = ((src[k] << 8) + src[k]) >> shift;
+
+            src += img->stride[i];
+            dst += output->stride[i]/2;
+        }
+    }
+}
+
+static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
+{
+    depth_hnd_t *h = handle;
+
+    if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
+        return -1;
+
+    if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
+    {
+        dither_image( &h->buffer.img, &output->img, h->error_buf );
+        output->img = h->buffer.img;
+    }
+    else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
+    {
+        scale_image( &h->buffer.img, &output->img );
+        output->img = h->buffer.img;
+    }
+    return 0;
+}
+
+static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
+{
+    depth_hnd_t *h = handle;
+    return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
+}
+
+static void free_filter( hnd_t handle )
+{
+    depth_hnd_t *h = handle;
+    h->prev_filter.free( h->prev_hnd );
+    x264_cli_pic_clean( &h->buffer );
+    x264_free( h );
+}
+
+static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
+                 x264_param_t *param, char *opt_string )
+{
+    int ret = 0;
+    int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
+    int csp = ~(~info->csp ^ change_fmt);
+    int bit_depth = 8*x264_cli_csp_depth_factor( csp );
+
+    if( opt_string )
+    {
+        static const char *optlist[] = { "bit_depth", NULL };
+        char **opts = x264_split_options( opt_string, optlist );
+
+        if( opts )
+        {
+            char *str_bit_depth = x264_get_option( "bit_depth", opts );
+            bit_depth = x264_otoi( str_bit_depth, -1 );
+
+            ret = bit_depth < 8 || bit_depth > 16;
+            csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
+            change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
+            x264_free_string_array( opts );
+        }
+        else
+            ret = 1;
+    }
+
+    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH )
+    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
+
+    /* only add the filter to the chain if it's needed */
+    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
+    {
+        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
+        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );
+
+        if( !h )
+            return -1;
+
+        h->error_buf = (int16_t*)(h + 1);
+        h->dst_csp = csp;
+        h->bit_depth = bit_depth;
+        h->prev_hnd = *handle;
+        h->prev_filter = *filter;
+
+        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
+        {
+            x264_free( h );
+            return -1;
+        }
+
+        *handle = h;
+        *filter = depth_filter;
+        info->csp = h->dst_csp;
+    }
+
+    return 0;
+}
+
+cli_vid_filter_t depth_filter = { NAME, NULL, init, get_frame, release_frame, free_filter, NULL };
diff --git a/filters/video/internal.c b/filters/video/internal.c
index 444ea1f8..ef096dc2 100644
--- a/filters/video/internal.c
+++ b/filters/video/internal.c
@@ -51,6 +51,7 @@ int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
     {
         int height = in->img.height * x264_cli_csps[csp].height[i];
         int width =  in->img.width  * x264_cli_csps[csp].width[i];
+        width *= x264_cli_csp_depth_factor( in->img.csp );
         x264_cli_plane_copy( out->img.plane[i], out->img.stride[i], in->img.plane[i],
                              in->img.stride[i], width, height );
     }
diff --git a/filters/video/resize.c b/filters/video/resize.c
index 38077b2d..04b5e73a 100644
--- a/filters/video/resize.c
+++ b/filters/video/resize.c
@@ -79,10 +79,21 @@ static void help( int longhelp )
             "            - fittobox: resizes the video based on the desired contraints\n"
             "               - width, height, both\n"
             "            - fittobox and sar: same as above except with specified sar\n"
-            "            simultaneously converting to the given colorspace\n"
-            "            using resizer method [\"bicubic\"]\n"
-            "             - fastbilinear, bilinear, bicubic, experimental, point,\n"
-            "             - area, bicublin, gauss, sinc, lanczos, spline\n" );
+            "            - csp: convert to the given csp. syntax: [name][:depth]\n"
+            "               - valid csp names [keep current]: " );
+
+    for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
+    {
+        printf( "%s", x264_cli_csps[i].name );
+        if( i+1 < X264_CSP_CLI_MAX )
+            printf( ", " );
+    }
+    printf( "\n"
+            "               - depth: 8 or 16 bits per pixel [keep current]\n"
+            "            note: not all depths are supported by all csps.\n"
+            "            - method: use resizer method [\"bicubic\"]\n"
+            "               - fastbilinear, bilinear, bicubic, experimental, point,\n"
+            "               - area, bicublin, gauss, sinc, lanczos, spline\n" );
 }
 
 static uint32_t convert_cpu_to_flag( uint32_t cpu )
@@ -131,13 +142,15 @@ static int convert_csp_to_pix_fmt( int csp )
         return csp&X264_CSP_MASK;
     switch( csp&X264_CSP_MASK )
     {
-        case X264_CSP_I420: return PIX_FMT_YUV420P;
-        case X264_CSP_I422: return PIX_FMT_YUV422P;
-        case X264_CSP_I444: return PIX_FMT_YUV444P;
-        case X264_CSP_NV12: return PIX_FMT_NV12;
-        case X264_CSP_YV12: return PIX_FMT_YUV420P; /* specially handled via swapping chroma */
-        case X264_CSP_BGR:  return PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return PIX_FMT_BGRA;
+        case X264_CSP_YV12: /* specially handled via swapping chroma */
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
+        /* the next 3 csps have no equivalent 16bit depth in swscale */
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGRA;
         default:            return PIX_FMT_NONE;
     }
 }
@@ -147,23 +160,30 @@ static int pick_closest_supported_csp( int csp )
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     switch( pix_fmt )
     {
+        case PIX_FMT_YUV420P16LE:
+        case PIX_FMT_YUV420P16BE:
+            return X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_YUV422P:
-        case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV422P16BE:
         case PIX_FMT_YUYV422:
         case PIX_FMT_UYVY422:
             return X264_CSP_I422;
+        case PIX_FMT_YUV422P16LE:
+        case PIX_FMT_YUV422P16BE:
+            return X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_YUV444P:
+            return X264_CSP_I444;
         case PIX_FMT_YUV444P16LE:
         case PIX_FMT_YUV444P16BE:
-            return X264_CSP_I444;
-        case PIX_FMT_RGB24:    // convert rgb to bgr
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE:
+            return X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
+        case PIX_FMT_RGB24:
         case PIX_FMT_RGB565BE:
         case PIX_FMT_RGB565LE:
         case PIX_FMT_RGB555BE:
         case PIX_FMT_RGB555LE:
+            return X264_CSP_RGB;
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE:
+            return X264_CSP_RGB | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_BGR24:
         case PIX_FMT_BGR565BE:
         case PIX_FMT_BGR565LE:
@@ -209,12 +229,27 @@ static int handle_opts( const char **optlist, char **opts, video_info_t *info, r
 
     if( str_csp )
     {
-        /* output csp was specified, lookup against valid values */
+        /* output csp was specified, first check if optional depth was provided */
+        char *str_depth = strchr( str_csp, ':' );
+        int depth = x264_cli_csp_depth_factor( info->csp ) * 8;
+        if( str_depth )
+        {
+            /* csp bit depth was specified */
+            *str_depth++ = '\0';
+            depth = x264_otoi( str_depth, -1 );
+            FAIL_IF_ERROR( depth != 8 && depth != 16, "unsupported bit depth %d\n", depth );
+        }
+        /* now lookup against the list of valid csps */
         int csp;
-        for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-            csp--;
+        if( strlen( str_csp ) == 0 )
+            csp = info->csp & X264_CSP_MASK;
+        else
+            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
+                csp--;
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
+        if( depth == 16 )
+            h->dst_csp |= X264_CSP_HIGH_DEPTH;
     }
 
     /* if the input sar is currently invalid, set it to 1:1 so it can be used in math */
@@ -366,8 +401,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
     h->swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
     int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );
 
+    int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH );
+    int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
+
     /* confirm swscale can support this conversion */
+    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
+                   info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
+    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
+                   h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
     FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
                    "swscale is not compatible with interlaced vertical resizing\n" )
diff --git a/filters/video/video.c b/filters/video/video.c
index 61dc8c6c..71ae01e4 100644
--- a/filters/video/video.c
+++ b/filters/video/video.c
@@ -51,6 +51,7 @@ void x264_register_vid_filters()
     REGISTER_VFILTER( fix_vfr_pts );
     REGISTER_VFILTER( resize );
     REGISTER_VFILTER( select_every );
+    REGISTER_VFILTER( depth );
 #if HAVE_GPL
 #endif
 }
diff --git a/input/input.c b/input/input.c
index 78c7a882..a14bd3cb 100644
--- a/input/input.c
+++ b/input/input.c
@@ -32,7 +32,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
     [X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
     [X264_CSP_NV12] = { "nv12", 2, { 1,  1 },     { 1, .5 },     2, 2 },
     [X264_CSP_BGR]  = { "bgr",  1, { 3 },         { 1 },         1, 1 },
-    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 }
+    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 },
+    [X264_CSP_RGB]  = { "rgb",  1, { 3 },         { 1 },         1, 1 },
 };
 
 int x264_cli_csp_is_invalid( int csp )
@@ -41,6 +42,13 @@ int x264_cli_csp_is_invalid( int csp )
     return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
 }
 
+int x264_cli_csp_depth_factor( int csp )
+{
+    if( x264_cli_csp_is_invalid( csp ) )
+        return 0;
+    return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1;
+}
+
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
 {
     int csp_mask = csp & X264_CSP_MASK;
@@ -48,6 +56,7 @@ uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
         return 0;
     uint64_t size = (uint64_t)width * height;
     size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane];
+    size *= x264_cli_csp_depth_factor( csp );
     return size;
 }
 
@@ -78,7 +87,7 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
          pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
          if( !pic->img.plane[i] )
              return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
+         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
     }
 
     return 0;
diff --git a/input/input.h b/input/input.h
index 972dd9c6..43826d73 100644
--- a/input/input.h
+++ b/input/input.h
@@ -36,6 +36,7 @@ typedef struct
     char *index_file;
     char *resolution;
     char *colorspace;
+    int bit_depth;
     char *timebase;
     int seek;
 } cli_input_opt_t;
@@ -103,8 +104,9 @@ extern cli_input_t input;
 #define X264_CSP_I444          (X264_CSP_MAX+1)  /* yuv 4:4:4 planar    */
 #define X264_CSP_BGR           (X264_CSP_MAX+2)  /* packed bgr 24bits   */
 #define X264_CSP_BGRA          (X264_CSP_MAX+3)  /* packed bgr 32bits   */
-#define X264_CSP_CLI_MAX       (X264_CSP_MAX+4)  /* end of list         */
-#define X264_CSP_OTHER          0x2000           /* non x264 colorspace */
+#define X264_CSP_RGB           (X264_CSP_MAX+4)  /* packed rgb 24bits   */
+#define X264_CSP_CLI_MAX       (X264_CSP_MAX+5)  /* end of list         */
+#define X264_CSP_OTHER          0x4000           /* non x264 colorspace */
 
 typedef struct
 {
@@ -119,6 +121,7 @@ typedef struct
 extern const x264_cli_csp_t x264_cli_csps[];
 
 int      x264_cli_csp_is_invalid( int csp );
+int      x264_cli_csp_depth_factor( int csp );
 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
 void     x264_cli_pic_clean( cli_pic_t *pic );
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
diff --git a/input/raw.c b/input/raw.c
index f5fbed65..9617fb12 100644
--- a/input/raw.c
+++ b/input/raw.c
@@ -34,11 +34,12 @@ typedef struct
     int next_frame;
     uint64_t plane_size[4];
     uint64_t frame_size;
+    int bit_depth;
 } raw_hnd_t;
 
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    raw_hnd_t *h = malloc( sizeof(raw_hnd_t) );
+    raw_hnd_t *h = calloc( 1, sizeof(raw_hnd_t) );
     if( !h )
         return -1;
 
@@ -61,8 +62,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
     else /* default */
         info->csp = X264_CSP_I420;
 
-    h->next_frame = 0;
-    info->vfr     = 0;
+    h->bit_depth = opt->bit_depth;
+    FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );
+    if( h->bit_depth > 8 )
+        info->csp |= X264_CSP_HIGH_DEPTH;
 
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
@@ -73,11 +76,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
 
     info->thread_safe = 1;
     info->num_frames  = 0;
-    h->frame_size = 0;
-    for( int i = 0; i < x264_cli_csps[info->csp].planes; i++ )
+    info->vfr         = 0;
+
+    const x264_cli_csp_t *csp = x264_cli_get_csp( info->csp );
+    for( int i = 0; i < csp->planes; i++ )
     {
         h->plane_size[i] = x264_cli_pic_plane_size( info->csp, info->width, info->height, i );
         h->frame_size += h->plane_size[i];
+        /* x264_cli_pic_plane_size returns the size in bytes, we need the value in pixels from here on */
+        h->plane_size[i] /= x264_cli_csp_depth_factor( info->csp );
     }
 
     if( x264_is_regular_file( h->fh ) )
@@ -95,8 +102,22 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
 static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
 {
     int error = 0;
+    int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
-        error |= fread( pic->img.plane[i], h->plane_size[i], 1, h->fh ) <= 0;
+    {
+        error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
+        if( h->bit_depth & 7 )
+        {
+            /* upconvert non 16bit high depth planes to 16bit using the same
+             * algorithm as used in the depth filter. */
+            uint16_t *plane = (uint16_t*)pic->img.plane[i];
+            uint64_t pixel_count = h->plane_size[i];
+            int lshift = 16 - h->bit_depth;
+            int rshift = 2*h->bit_depth - 16;
+            for( uint64_t j = 0; j < pixel_count; j++ )
+                plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
+        }
+    }
     return error;
 }
 
diff --git a/x264.c b/x264.c
index 272c4538..d55247c6 100644
--- a/x264.c
+++ b/x264.c
@@ -214,7 +214,7 @@ static void print_version_info()
 #else
     printf( "using a non-gcc compiler\n" );
 #endif
-    printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
+    printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
     printf( "x264 license: " );
 #if HAVE_GPL
     printf( "GPL version 2 or later\n" );
@@ -375,7 +375,7 @@ static void Help( x264_param_t *defaults, int longhelp )
 #else
         "no",
 #endif
-        BIT_DEPTH
+        x264_bit_depth
       );
     H0( "Example usage:\n" );
     H0( "\n" );
@@ -697,6 +697,7 @@ static void Help( x264_param_t *defaults, int longhelp )
         "                                  - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
     H1( "      --input-csp <string>    Specify input colorspace format for raw input\n" );
     print_csp_names( longhelp );
+    H1( "      --input-depth <integer> Specify input bit depth for raw input\n" );
     H1( "      --input-res <intxint>   Specify input resolution (width x height)\n" );
     H1( "      --index <string>        Filename for input index file\n" );
     H0( "      --sar width:height      Specify Sample Aspect Ratio\n" );
@@ -769,7 +770,8 @@ enum {
     OPT_LOG_LEVEL,
     OPT_VIDEO_FILTER,
     OPT_INPUT_RES,
-    OPT_INPUT_CSP
+    OPT_INPUT_CSP,
+    OPT_INPUT_DEPTH
 } OptionsOPT;
 
 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
@@ -921,6 +923,7 @@ static struct option long_options[] =
     { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
     { "input-res",   required_argument, NULL, OPT_INPUT_RES },
     { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
+    { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
     {0, 0, 0, 0}
 };
 
@@ -1082,10 +1085,16 @@ static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info,
     if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
         param->i_csp = info->csp;
     else
-        param->i_csp = X264_CSP_I420;
+        param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
     if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
         return -1;
 
+    char args[20];
+    sprintf( args, "bit_depth=%d", x264_bit_depth );
+
+    if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
+        return -1;
+
     return 0;
 }
 
@@ -1138,6 +1147,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
 
     memset( opt, 0, sizeof(cli_opt_t) );
     memset( &input_opt, 0, sizeof(cli_input_opt_t) );
+    input_opt.bit_depth = 8;
     opt->b_progress = 1;
 
     /* Presets are applied before all other options. */
@@ -1283,6 +1293,9 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
             case OPT_INPUT_CSP:
                 input_opt.colorspace = optarg;
                 break;
+            case OPT_INPUT_DEPTH:
+                input_opt.bit_depth = atoi( optarg );
+                break;
             default:
 generic_option:
             {
diff --git a/x264.h b/x264.h
index ef85b9fb..d8a12565 100644
--- a/x264.h
+++ b/x264.h
@@ -180,7 +180,8 @@ static const char * const x264_open_gop_names[] = { "none", "normal", "bluray",
 #define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
 #define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
 #define X264_CSP_MAX            0x0004  /* end of list */
-#define X264_CSP_VFLIP          0x1000  /* */
+#define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
+#define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
 /* Slice type */
 #define X264_TYPE_AUTO          0x0000  /* Let x264 choose the right type */
@@ -342,7 +343,7 @@ typedef struct x264_param_t
     {
         int         i_rc_method;    /* X264_RC_* */
 
-        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
+        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)) */
         int         i_qp_min;       /* min allowed QP value */
         int         i_qp_max;       /* max allowed QP value */
         int         i_qp_step;      /* max QP step between frames */
@@ -566,6 +567,15 @@ int     x264_param_apply_profile( x264_param_t *, const char *profile );
  * Picture structures and functions
  ****************************************************************************/
 
+/* x264_bit_depth:
+ *      Specifies the number of bits per pixel that x264 uses. This is also the
+ *      bit depth that x264 encodes in. If this value is > 8, x264 will read
+ *      two bytes of input data for each pixel sample, and expect the upper
+ *      (16-x264_bit_depth) bits to be zero.
+ *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
+ *      colorspace depth as well. */
+extern const int x264_bit_depth;
+
 enum pic_struct_e
 {
     PIC_STRUCT_AUTO              = 0, // automatically decide (default)