Optimize x86 asm for Intel macro-op fusion

[x264] / x264.h
diff --git a/x264.h b/x264.h

index ec9321c294f3bb354119c02091439711351582a7..34ad872c888ac6fd90cf1882bba90f039a1f2432 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -1,10 +1,11 @@
  /*****************************************************************************
- * x264.h: h264 encoder library
+ * x264.h: x264 public header
   *****************************************************************************
- * Copyright (C) 2003-2008 x264 Project
+ * Copyright (C) 2003-2011 x264 project
   *
   * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   *          Loren Merritt <lorenm@u.washington.edu>
+ *          Fiona Glaser <fiona@x264.com>
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -19,6 +20,9 @@
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
   *****************************************************************************/
  
  #ifndef X264_X264_H
@@ -35,14 +39,64 @@
  
  #include <stdarg.h>
  
-#define X264_BUILD 76
+#include "x264_config.h"
+
+#define X264_BUILD 118
  
  /* x264_t:
   *      opaque handler for encoder */
  typedef struct x264_t x264_t;
  
  /****************************************************************************
- * Initialisation structure and function.
+ * NAL structure and functions
+ ****************************************************************************/
+
+enum nal_unit_type_e
+{
+    NAL_UNKNOWN     = 0,
+    NAL_SLICE       = 1,
+    NAL_SLICE_DPA   = 2,
+    NAL_SLICE_DPB   = 3,
+    NAL_SLICE_DPC   = 4,
+    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
+    NAL_SEI         = 6,    /* ref_idc == 0 */
+    NAL_SPS         = 7,
+    NAL_PPS         = 8,
+    NAL_AUD         = 9,
+    NAL_FILLER      = 12,
+    /* ref_idc == 0 for 6,9,10,11,12 */
+};
+enum nal_priority_e
+{
+    NAL_PRIORITY_DISPOSABLE = 0,
+    NAL_PRIORITY_LOW        = 1,
+    NAL_PRIORITY_HIGH       = 2,
+    NAL_PRIORITY_HIGHEST    = 3,
+};
+
+/* The data within the payload is already NAL-encapsulated; the ref_idc and type
+ * are merely in the struct for easy access by the calling application.
+ * All data returned in an x264_nal_t, including the data in p_payload, is no longer
+ * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
+ * before calling x264_encoder_encode or x264_encoder_headers again. */
+typedef struct
+{
+    int i_ref_idc;  /* nal_priority_e */
+    int i_type;     /* nal_unit_type_e */
+    int b_long_startcode;
+    int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
+    int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
+
+    /* Size of payload in bytes. */
+    int     i_payload;
+    /* If param->b_annexb is set, Annex-B bytestream with startcode.
+     * Otherwise, startcode is replaced with a 4-byte size.
+     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
+    uint8_t *p_payload;
+} x264_nal_t;
+
+/****************************************************************************
+ * Encoder parameters
   ****************************************************************************/
  /* CPU flags
   */
@@ -50,7 +104,8 @@ typedef struct x264_t x264_t;
  #define X264_CPU_CACHELINE_64   0x000002  /* 32/64 is the size of a cacheline in bytes */
  #define X264_CPU_ALTIVEC        0x000004
  #define X264_CPU_MMX            0x000008
-#define X264_CPU_MMXEXT         0x000010  /* MMX2 aka MMXEXT aka ISSE */
+#define X264_CPU_MMX2           0x000010  /* MMX2 aka MMXEXT aka ISSE */
+#define X264_CPU_MMXEXT    X264_CPU_MMX2
  #define X264_CPU_SSE            0x000020
  #define X264_CPU_SSE2           0x000040
  #define X264_CPU_SSE2_IS_SLOW   0x000080  /* avoid most SSE2 functions on Athlon64 */
@@ -66,6 +121,10 @@ typedef struct x264_t x264_t;
  #define X264_CPU_ARMV6          0x020000
  #define X264_CPU_NEON           0x040000  /* ARM NEON */
  #define X264_CPU_FAST_NEON_MRC  0x080000  /* Transfer from NEON to ARM register is fast (Cortex-A9) */
+#define X264_CPU_SLOW_CTZ       0x100000  /* BSR/BSF x86 instructions are really slow on some CPUs */
+#define X264_CPU_SLOW_ATOM      0x200000  /* The Atom just sucks */
+#define X264_CPU_AVX            0x400000  /* AVX support: requires OS support even if YMM registers
+                                           * aren't used. */
  
  /* Analyse flags
   */
@@ -89,36 +148,50 @@ typedef struct x264_t x264_t;
  #define X264_RC_CQP                  0
  #define X264_RC_CRF                  1
  #define X264_RC_ABR                  2
+#define X264_QP_AUTO                 0
  #define X264_AQ_NONE                 0
  #define X264_AQ_VARIANCE             1
  #define X264_AQ_AUTOVARIANCE         2
  #define X264_B_ADAPT_NONE            0
  #define X264_B_ADAPT_FAST            1
  #define X264_B_ADAPT_TRELLIS         2
+#define X264_WEIGHTP_NONE            0
+#define X264_WEIGHTP_SIMPLE          1
+#define X264_WEIGHTP_SMART           2
+#define X264_B_PYRAMID_NONE          0
+#define X264_B_PYRAMID_STRICT        1
+#define X264_B_PYRAMID_NORMAL        2
+#define X264_KEYINT_MIN_AUTO         0
+#define X264_KEYINT_MAX_INFINITE     (1<<30)
  
  static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
  static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
+static const char * const x264_b_pyramid_names[] = { "none", "strict", "normal", 0 };
  static const char * const x264_overscan_names[] = { "undef", "show", "crop", 0 };
  static const char * const x264_vidformat_names[] = { "component", "pal", "ntsc", "secam", "mac", "undef", 0 };
  static const char * const x264_fullrange_names[] = { "off", "on", 0 };
  static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "film", 0 };
  static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
  static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
+static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
  
-/* Colorspace type
- * legacy only; nothing other than I420 is really supported. */
+/* Colorspace type */
  #define X264_CSP_MASK           0x00ff  /* */
  #define X264_CSP_NONE           0x0000  /* Invalid mode     */
  #define X264_CSP_I420           0x0001  /* yuv 4:2:0 planar */
-#define X264_CSP_I422           0x0002  /* yuv 4:2:2 planar */
-#define X264_CSP_I444           0x0003  /* yuv 4:4:4 planar */
-#define X264_CSP_YV12           0x0004  /* yuv 4:2:0 planar */
-#define X264_CSP_YUYV           0x0005  /* yuv 4:2:2 packed */
-#define X264_CSP_RGB            0x0006  /* rgb 24bits       */
-#define X264_CSP_BGR            0x0007  /* bgr 24bits       */
-#define X264_CSP_BGRA           0x0008  /* bgr 32bits       */
-#define X264_CSP_MAX            0x0009  /* end of list */
-#define X264_CSP_VFLIP          0x1000  /* */
+#define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
+#define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
+#define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
+#define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
+#define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000c  /* end of list */
+#define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
+#define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
  
  /* Slice type */
  #define X264_TYPE_AUTO          0x0000  /* Let x264 choose the right type */
@@ -127,6 +200,7 @@ static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", ""
  #define X264_TYPE_P             0x0003
  #define X264_TYPE_BREF          0x0004  /* Non-disposable B-frame */
  #define X264_TYPE_B             0x0005
+#define X264_TYPE_KEYFRAME      0x0006  /* IDR or I depending on b_open_gop option */
  #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
  #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
  
@@ -141,6 +215,11 @@ static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", ""
  #define X264_THREADS_AUTO 0 /* Automatically select optimal number of threads */
  #define X264_SYNC_LOOKAHEAD_AUTO (-1) /* Automatically select optimal lookahead thread buffer size */
  
+/* HRD */
+#define X264_NAL_HRD_NONE            0
+#define X264_NAL_HRD_VBR             1
+#define X264_NAL_HRD_CBR             2
+
  /* Zones: override ratecontrol or other options for specific sections of the video.
   * See x264_encoder_reconfig() for which options can be changed.
   * If zones overlap, whichever comes later in the list takes precedence. */
@@ -158,16 +237,26 @@ typedef struct x264_param_t
      /* CPU flags */
      unsigned int cpu;
      int         i_threads;       /* encode multiple frames in parallel */
+    int         b_sliced_threads;  /* Whether to use slice-based threading. */
      int         b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
+    int         b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms */
      int         i_sync_lookahead; /* threaded lookahead buffer */
  
      /* Video Properties */
      int         i_width;
      int         i_height;
-    int         i_csp;  /* CSP of encoded bitstream, only i420 supported */
+    int         i_csp;         /* CSP of encoded bitstream */
      int         i_level_idc;
      int         i_frame_total; /* number of frames to encode if known, else 0 */
  
+    /* NAL HRD
+     * Uses Buffering and Picture Timing SEIs to signal HRD
+     * The HRD in H.264 was not designed with VFR in mind.
+     * It is therefore not recommendeded to use NAL HRD with VFR.
+     * Furthermore, reconfiguring the VBV (via x264_encoder_reconfig)
+     * will currently generate invalid HRD. */
+    int         i_nal_hrd;
+
      struct
      {
          /* they will be reduced to be 0 < x <= 65535 and prime */
@@ -185,18 +274,21 @@ typedef struct x264_param_t
          int         i_chroma_loc;    /* both top & bottom */
      } vui;
  
-    int         i_fps_num;
-    int         i_fps_den;
-
      /* Bitstream parameters */
      int         i_frame_reference;  /* Maximum number of reference frames */
+    int         i_dpb_size;         /* Force a DPB size larger than that implied by B-frames and reference frames.
+                                     * Useful in combination with interactive error resilience. */
      int         i_keyint_max;       /* Force an IDR keyframe at this interval */
      int         i_keyint_min;       /* Scenecuts closer together than this are coded as I, not IDR. */
      int         i_scenecut_threshold; /* how aggressively to insert extra I frames */
+    int         b_intra_refresh;    /* Whether or not to use periodic intra refresh instead of IDR frames. */
+
      int         i_bframe;   /* how many b-frame between 2 references pictures */
      int         i_bframe_adaptive;
      int         i_bframe_bias;
-    int         b_bframe_pyramid;   /* Keep some B-frames as references */
+    int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
+    int         b_open_gop;
+    int         b_bluray_compat;
  
      int         b_deblocking_filter;
      int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -206,15 +298,18 @@ typedef struct x264_param_t
      int         i_cabac_init_idc;
  
      int         b_interlaced;
+    int         b_constrained_intra;
  
      int         i_cqm_preset;
      char        *psz_cqm_file;      /* JM format */
      uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
-    uint8_t     cqm_4ic[16];
      uint8_t     cqm_4py[16];
+    uint8_t     cqm_4ic[16];
      uint8_t     cqm_4pc[16];
      uint8_t     cqm_8iy[64];
      uint8_t     cqm_8py[64];
+    uint8_t     cqm_8ic[64];
+    uint8_t     cqm_8pc[64];
  
      /* Log */
      void        (*pf_log)( void *, int i_level, const char *psz, va_list );
@@ -230,6 +325,7 @@ typedef struct x264_param_t
          unsigned int inter;     /* inter partitions */
  
          int          b_transform_8x8;
+        int          i_weighted_pred; /* weighting for P-frames */
          int          b_weighted_bipred; /* implicit weighting for B-frames */
          int          i_direct_mv_pred; /* spatial vs temporal mv prediction */
          int          i_chroma_qp_offset;
@@ -240,7 +336,7 @@ typedef struct x264_param_t
          int          i_mv_range_thread; /* minimum space between threads. -1 = auto, based on number of threads. */
          int          i_subpel_refine; /* subpixel motion estimation quality */
          int          b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */
-        int          b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */
+        int          b_mixed_references; /* allow each mb partition to have its own reference number */
          int          i_trellis;  /* trellis RD quantization */
          int          b_fast_pskip; /* early SKIP detection on P-frames */
          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
@@ -261,13 +357,14 @@ typedef struct x264_param_t
      {
          int         i_rc_method;    /* X264_RC_* */
  
-        int         i_qp_constant;  /* 0-51 */
+        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)). 0=lossless */
          int         i_qp_min;       /* min allowed QP value */
          int         i_qp_max;       /* max allowed QP value */
          int         i_qp_step;      /* max QP step between frames */
  
          int         i_bitrate;
          float       f_rf_constant;  /* 1pass VBR, nominal QP */
+        float       f_rf_constant_max;  /* In CRF mode, maximum CRF as caused by VBV */
          float       f_rate_tolerance;
          int         i_vbv_max_bitrate;
          int         i_vbv_buffer_size;
@@ -295,12 +392,54 @@ typedef struct x264_param_t
          char        *psz_zones;     /* alternate method of specifying zones */
      } rc;
  
+    /* Cropping Rectangle parameters: added to those implicitly defined by
+       non-mod16 video resolutions. */
+    struct
+    {
+        unsigned int i_left;
+        unsigned int i_top;
+        unsigned int i_right;
+        unsigned int i_bottom;
+    } crop_rect;
+
+    /* frame packing arrangement flag */
+    int i_frame_packing;
+
      /* Muxing parameters */
      int b_aud;                  /* generate access unit delimiters */
      int b_repeat_headers;       /* put SPS/PPS before each keyframe */
      int b_annexb;               /* if set, place start codes (4 bytes) before NAL units,
                                   * otherwise place size (4 bytes) before NAL units. */
      int i_sps_id;               /* SPS and PPS id number */
+    int b_vfr_input;            /* VFR input.  If 1, use timebase and timestamps for ratecontrol purposes.
+                                 * If 0, use fps only. */
+    int b_pulldown;             /* use explicity set timebase for CFR */
+    uint32_t i_fps_num;
+    uint32_t i_fps_den;
+    uint32_t i_timebase_num;    /* Timebase numerator */
+    uint32_t i_timebase_den;    /* Timebase denominator */
+
+    int b_tff;
+
+    /* Pulldown:
+     * The correct pic_struct must be passed with each input frame.
+     * The input timebase should be the timebase corresponding to the output framerate. This should be constant.
+     * e.g. for 3:2 pulldown timebase should be 1001/30000
+     * The PTS passed with each frame must be the PTS of the frame after pulldown is applied.
+     * Frame doubling and tripling require b_vfr_input set to zero (see H.264 Table D-1)
+     *
+     * Pulldown changes are not clearly defined in H.264. Therefore, it is the calling app's responsibility to manage this.
+     */
+
+    int b_pic_struct;
+
+    /* Fake Interlaced.
+     *
+     * Used only when b_interlaced=0. Setting this flag makes it possible to flag the stream as PAFF interlaced yet
+     * encode all frames progessively. It is useful for encoding 25p and 30p Blu-Ray streams.
+     */
+
+    int b_fake_interlaced;
  
      /* Slicing parameters */
      int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
@@ -312,9 +451,47 @@ typedef struct x264_param_t
       * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
       * Not used when x264_encoder_reconfig is called directly. */
      void (*param_free)( void* );
+
+    /* Optional low-level callback for low-latency encoding.  Called for each output NAL unit
+     * immediately after the NAL unit is finished encoding.  This allows the calling application
+     * to begin processing video data (e.g. by sending packets over a network) before the frame
+     * is done encoding.
+     *
+     * This callback MUST do the following in order to work correctly:
+     * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 16.
+     * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
+     * After these steps, the content of nal is valid and can be used in the same way as if
+     * the NAL unit were output by x264_encoder_encode.
+     *
+     * This does not need to be synchronous with the encoding process: the data pointed to
+     * by nal (both before and after x264_nal_encode) will remain valid until the next
+     * x264_encoder_encode call.  The callback must be re-entrant.
+     *
+     * This callback does not work with frame-based threads; threads must be disabled
+     * or sliced-threads enabled.  This callback also does not work as one would expect
+     * with HRD -- since the buffering period SEI cannot be calculated until the frame
+     * is finished encoding, it will not be sent via this callback.
+     *
+     * Note also that the NALs are not necessarily returned in order when sliced threads is
+     * enabled.  Accordingly, the variable i_first_mb and i_last_mb are available in
+     * x264_nal_t to help the calling application reorder the slices if necessary.
+     *
+     * When this callback is enabled, x264_encoder_encode does not return valid NALs;
+     * the calling application is expected to acquire all output NALs through the callback.
+     *
+     * It is generally sensible to combine this callback with a use of slice-max-mbs or
+     * slice-max-size. */
+    void (*nalu_process) ( x264_t *h, x264_nal_t *nal );
  } x264_param_t;
  
-typedef struct {
+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+
+/****************************************************************************
+ * H.264 level restriction information
+ ****************************************************************************/
+
+typedef struct
+{
      int level_idc;
      int mbps;        /* max macroblock processing rate (macroblocks/sec) */
      int frame_size;  /* max frame size (macroblocks) */
@@ -324,6 +501,7 @@ typedef struct {
      int mv_range;    /* max vertical mv component range (pixels) */
      int mvs_per_2mb; /* max mvs per 2 consecutive mbs. */
      int slice_rate;  /* ?? */
+    int mincr;       /* min compression ratio */
      int bipred8x8;   /* limit bipred to >=8x8 */
      int direct8x8;   /* limit b_direct to >=8x8 */
      int frame_only;  /* forbid interlacing */
@@ -332,6 +510,10 @@ typedef struct {
  /* all of the levels defined in the standard, terminated by .level_idc=0 */
  extern const x264_level_t x264_levels[];
  
+/****************************************************************************
+ * Basic parameter handling functions
+ ****************************************************************************/
+
  /* x264_param_default:
   *      fill x264_param_t with default values and do CPU detection */
  void    x264_param_default( x264_param_t * );
@@ -348,17 +530,153 @@ void    x264_param_default( x264_param_t * );
  int x264_param_parse( x264_param_t *, const char *name, const char *value );
  
  /****************************************************************************
- * Picture structures and functions.
+ * Advanced parameter handling functions
+ ****************************************************************************/
+
+/* These functions expose the full power of x264's preset-tune-profile system for
+ * easy adjustment of large numbers of internal parameters.
+ *
+ * In order to replicate x264CLI's option handling, these functions MUST be called
+ * in the following order:
+ * 1) x264_param_default_preset
+ * 2) Custom user options (via param_parse or directly assigned variables)
+ * 3) x264_param_apply_fastfirstpass
+ * 4) x264_param_apply_profile
+ *
+ * Additionally, x264CLI does not apply step 3 if the preset chosen is "placebo"
+ * or --slow-firstpass is set. */
+
+/* x264_param_default_preset:
+ *      The same as x264_param_default, but also use the passed preset and tune
+ *      to modify the default settings.
+ *      (either can be NULL, which implies no preset or no tune, respectively)
+ *
+ *      Currently available presets are, ordered from fastest to slowest: */
+static const char * const x264_preset_names[] = { "ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow", "placebo", 0 };
+
+/*      The presets can also be indexed numerically, as in:
+ *      x264_param_default_preset( &param, "3", ... )
+ *      with ultrafast mapping to "0" and placebo mapping to "9".  This mapping may
+ *      of course change if new presets are added in between, but will always be
+ *      ordered from fastest to slowest.
+ *
+ *      Warning: the speed of these presets scales dramatically.  Ultrafast is a full
+ *      100 times faster than placebo!
+ *
+ *      Currently available tunings are: */
+static const char * const x264_tune_names[] = { "film", "animation", "grain", "stillimage", "psnr", "ssim", "fastdecode", "zerolatency", 0 };
+
+/*      Multiple tunings can be used if separated by a delimiter in ",./-+",
+ *      however multiple psy tunings cannot be used.
+ *      film, animation, grain, stillimage, psnr, and ssim are psy tunings.
+ *
+ *      returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
+int     x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
+
+/* x264_param_apply_fastfirstpass:
+ *      If first-pass mode is set (rc.b_stat_read == 0, rc.b_stat_write == 1),
+ *      modify the encoder settings to disable options generally not useful on
+ *      the first pass. */
+void    x264_param_apply_fastfirstpass( x264_param_t * );
+
+/* x264_param_apply_profile:
+ *      Applies the restrictions of the given profile.
+ *      Currently available profiles are, from most to least restrictive: */
+static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", "high422", "high444", 0 };
+
+/*      (can be NULL, in which case the function will do nothing)
+ *
+ *      Does NOT guarantee that the given profile will be used: if the restrictions
+ *      of "High" are applied to settings that are already Baseline-compatible, the
+ *      stream will remain baseline.  In short, it does not increase settings, only
+ *      decrease them.
+ *
+ *      returns 0 on success, negative on failure (e.g. invalid profile name). */
+int     x264_param_apply_profile( x264_param_t *, const char *profile );
+
+/****************************************************************************
+ * Picture structures and functions
   ****************************************************************************/
+
+/* x264_bit_depth:
+ *      Specifies the number of bits per pixel that x264 uses. This is also the
+ *      bit depth that x264 encodes in. If this value is > 8, x264 will read
+ *      two bytes of input data for each pixel sample, and expect the upper
+ *      (16-x264_bit_depth) bits to be zero.
+ *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
+ *      colorspace depth as well. */
+extern const int x264_bit_depth;
+
+enum pic_struct_e
+{
+    PIC_STRUCT_AUTO              = 0, // automatically decide (default)
+    PIC_STRUCT_PROGRESSIVE       = 1, // progressive frame
+    // "TOP" and "BOTTOM" are not supported in x264 (PAFF only)
+    PIC_STRUCT_TOP_BOTTOM        = 4, // top field followed by bottom
+    PIC_STRUCT_BOTTOM_TOP        = 5, // bottom field followed by top
+    PIC_STRUCT_TOP_BOTTOM_TOP    = 6, // top field, bottom field, top field repeated
+    PIC_STRUCT_BOTTOM_TOP_BOTTOM = 7, // bottom field, top field, bottom field repeated
+    PIC_STRUCT_DOUBLE            = 8, // double frame
+    PIC_STRUCT_TRIPLE            = 9, // triple frame
+};
+
+typedef struct
+{
+    double cpb_initial_arrival_time;
+    double cpb_final_arrival_time;
+    double cpb_removal_time;
+
+    double dpb_output_time;
+} x264_hrd_t;
+
+/* Arbitrary user SEI:
+ * Payload size is in bytes and the payload pointer must be valid.
+ * Payload types and syntax can be found in Annex D of the H.264 Specification.
+ * SEI payload alignment bits as described in Annex D must be included at the
+ * end of the payload if needed.
+ * The payload should not be NAL-encapsulated.
+ * Payloads are written first in order of input, apart from in the case when HRD
+ * is enabled where payloads are written after the Buffering Period SEI. */
+
+typedef struct
+{
+    int payload_size;
+    int payload_type;
+    uint8_t *payload;
+} x264_sei_payload_t;
+
  typedef struct
  {
-    int     i_csp;
+    int num_payloads;
+    x264_sei_payload_t *payloads;
+    /* In: optional callback to free each payload AND x264_sei_payload_t when used. */
+    void (*sei_free)( void* );
+} x264_sei_t;
  
-    int     i_plane;
-    int     i_stride[4];
-    uint8_t *plane[4];
+typedef struct
+{
+    int     i_csp;       /* Colorspace */
+    int     i_plane;     /* Number of image planes */
+    int     i_stride[4]; /* Strides for each plane */
+    uint8_t *plane[4];   /* Pointers to each plane */
  } x264_image_t;
  
+typedef struct
+{
+    /* In: an array of quantizer offsets to be applied to this image during encoding.
+     *     These are added on top of the decisions made by x264.
+     *     Offsets can be fractional; they are added before QPs are rounded to integer.
+     *     Adaptive quantization must be enabled to use this feature.  Behavior if quant
+     *     offsets differ between encoding passes is undefined.
+     *
+     *     Array contains one offset per macroblock, in raster scan order.  In interlaced
+     *     mode, top-field MBs and bottom-field MBs are interleaved at the row level. */
+    float *quant_offsets;
+    /* In: optional callback to free quant_offsets when used.
+     *     Useful if one wants to use a different quant_offset array for each frame. */
+    void (*quant_offsets_free)( void* );
+} x264_image_properties_t;
+
  typedef struct
  {
      /* In: force picture type (if not auto)
@@ -368,10 +686,20 @@ typedef struct
       *     mixing of auto and forced frametypes is done.
       * Out: type of the picture encoded */
      int     i_type;
-    /* In: force quantizer for > 0 */
+    /* In: force quantizer for != X264_QP_AUTO */
      int     i_qpplus1;
+    /* In: pic_struct, for pulldown/doubling/etc...used only if b_pic_struct=1.
+     *     use pic_struct_e for pic_struct inputs
+     * Out: pic_struct element associated with frame */
+    int     i_pic_struct;
+    /* Out: whether this frame is a keyframe.  Important when using modes that result in
+     * SEI recovery points being used instead of IDR frames. */
+    int     b_keyframe;
      /* In: user pts, Out: pts of encoded picture (user)*/
      int64_t i_pts;
+    /* Out: frame dts. When the pts of the first frame is close to zero,
+     *      initial frames may have a negative dts which must be dealt with by any muxer */
+    int64_t i_dts;
      /* In: custom encoding parameters to be set from this frame forwards
             (in coded order, not display order). If NULL, continue using
             parameters from the previous frame.  Some parameters, such as
@@ -379,14 +707,27 @@ typedef struct
             of H.264 itself; in this case, the caller must force an IDR frame
             if it needs the changed parameter to apply immediately. */
      x264_param_t *param;
-
      /* In: raw data */
      x264_image_t img;
+    /* In: optional information to modify encoder decisions for this frame */
+    x264_image_properties_t prop;
+    /* Out: HRD timing information. Output only when i_nal_hrd is set. */
+    x264_hrd_t hrd_timing;
+    /* In: arbitrary user SEI (e.g subtitles, AFDs) */
+    x264_sei_t extra_sei;
+    /* private user data. libx264 doesn't touch this,
+       not even copy it from input to output frames. */
+    void *opaque;
  } x264_picture_t;
  
+/* x264_picture_init:
+ *  initialize an x264_picture_t.  Needs to be done if the calling application
+ *  allocates its own x264_picture_t as opposed to using x264_picture_alloc. */
+void x264_picture_init( x264_picture_t *pic );
+
  /* x264_picture_alloc:
   *  alloc data for a picture. You must call x264_picture_clean on it.
- *  returns 0 on success, or -1 on malloc failure. */
+ *  returns 0 on success, or -1 on malloc failure or invalid colorspace. */
  int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height );
  
  /* x264_picture_clean:
@@ -395,51 +736,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
  void x264_picture_clean( x264_picture_t *pic );
  
  /****************************************************************************
- * NAL structure and functions:
- ****************************************************************************/
-/* nal */
-enum nal_unit_type_e
-{
-    NAL_UNKNOWN = 0,
-    NAL_SLICE   = 1,
-    NAL_SLICE_DPA   = 2,
-    NAL_SLICE_DPB   = 3,
-    NAL_SLICE_DPC   = 4,
-    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
-    NAL_SEI         = 6,    /* ref_idc == 0 */
-    NAL_SPS         = 7,
-    NAL_PPS         = 8,
-    NAL_AUD         = 9,
-    /* ref_idc == 0 for 6,9,10,11,12 */
-};
-enum nal_priority_e
-{
-    NAL_PRIORITY_DISPOSABLE = 0,
-    NAL_PRIORITY_LOW        = 1,
-    NAL_PRIORITY_HIGH       = 2,
-    NAL_PRIORITY_HIGHEST    = 3,
-};
-
-/* The data within the payload is already NAL-encapsulated; the ref_idc and type
- * are merely in the struct for easy access by the calling application.
- * All data returned in an x264_nal_t, including the data in p_payload, is no longer
- * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
- * before calling x264_encoder_encode or x264_encoder_headers again. */
-typedef struct
-{
-    int i_ref_idc;  /* nal_priority_e */
-    int i_type;     /* nal_unit_type_e */
-
-    /* Size of payload in bytes. */
-    int     i_payload;
-    /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode.
-     * Otherwise, startcode is replaced with a 4-byte size.
-     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
-    uint8_t *p_payload;
-} x264_nal_t;
-
-/****************************************************************************
- * Encoder functions:
+ * Encoder functions
   ****************************************************************************/
  
  /* Force a link error in the case of linking against an incompatible API version.
@@ -454,24 +751,33 @@ typedef struct
  x264_t *x264_encoder_open( x264_param_t * );
  
  /* x264_encoder_reconfig:
- *      analysis-related parameters from x264_param_t are copied.
+ *      various parameters from x264_param_t are copied.
   *      this takes effect immediately, on whichever frame is encoded next;
   *      due to delay, this may not be the next frame passed to encoder_encode.
   *      if the change should apply to some particular frame, use x264_picture_t->param instead.
- *      returns 0 on success, negative on parameter validation error. */
+ *      returns 0 on success, negative on parameter validation error.
+ *      not all parameters can be changed; see the actual function for a detailed breakdown. */
  int     x264_encoder_reconfig( x264_t *, x264_param_t * );
+/* x264_encoder_parameters:
+ *      copies the current internal set of parameters to the pointer provided
+ *      by the caller.  useful when the calling application needs to know
+ *      how x264_encoder_open has changed the parameters, or the current state
+ *      of the encoder after multiple x264_encoder_reconfig calls.
+ *      note that the data accessible through pointers in the returned param struct
+ *      (e.g. filenames) should not be modified by the calling application. */
+void    x264_encoder_parameters( x264_t *, x264_param_t * );
  /* x264_encoder_headers:
   *      return the SPS and PPS that will be used for the whole stream.
- *      if i_nal > 0, returns the total size of all NAL payloads.
+ *      *pi_nal is the number of NAL units outputted in pp_nal.
   *      returns negative on error.
   *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_headers( x264_t *, x264_nal_t **, int * );
+int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
  /* x264_encoder_encode:
   *      encode one picture.
- *      if i_nal > 0, returns the total size of all NAL payloads.
+ *      *pi_nal is the number of NAL units outputted in pp_nal.
   *      returns negative on error, zero if no NAL units returned.
   *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_encode ( x264_t *, x264_nal_t **, int *, x264_picture_t *, x264_picture_t * );
+int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
  /* x264_encoder_close:
   *      close an encoder handler */
  void    x264_encoder_close  ( x264_t * );
@@ -479,5 +785,46 @@ void    x264_encoder_close  ( x264_t * );
   *      return the number of currently delayed (buffered) frames
   *      this should be used at the end of the stream, to know when you have all the encoded frames. */
  int     x264_encoder_delayed_frames( x264_t * );
+/* x264_encoder_maximum_delayed_frames( x264_t *h ):
+ *      return the maximum number of delayed (buffered) frames that can occur with the current
+ *      parameters. */
+int     x264_encoder_maximum_delayed_frames( x264_t *h );
+/* x264_encoder_intra_refresh:
+ *      If an intra refresh is not in progress, begin one with the next P-frame.
+ *      If an intra refresh is in progress, begin one as soon as the current one finishes.
+ *      Requires that b_intra_refresh be set.
+ *
+ *      Useful for interactive streaming where the client can tell the server that packet loss has
+ *      occurred.  In this case, keyint can be set to an extremely high value so that intra refreshes
+ *      only occur when calling x264_encoder_intra_refresh.
+ *
+ *      In multi-pass encoding, if x264_encoder_intra_refresh is called differently in each pass,
+ *      behavior is undefined.
+ *
+ *      Should not be called during an x264_encoder_encode. */
+void    x264_encoder_intra_refresh( x264_t * );
+/* x264_encoder_invalidate_reference:
+ *      An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients
+ *      system.  When the client has packet loss or otherwise incorrectly decodes a frame, the encoder
+ *      can be told with this command to "forget" the frame and all frames that depend on it, referencing
+ *      only frames that occurred before the loss.  This will force a keyframe if no frames are left to
+ *      reference after the aforementioned "forgetting".
+ *
+ *      It is strongly recommended to use a large i_dpb_size in this case, which allows the encoder to
+ *      keep around extra, older frames to fall back on in case more recent frames are all invalidated.
+ *      Unlike increasing i_frame_reference, this does not increase the number of frames used for motion
+ *      estimation and thus has no speed impact.  It is also recommended to set a very large keyframe
+ *      interval, so that keyframes are not used except as necessary for error recovery.
+ *
+ *      x264_encoder_invalidate_reference is not currently compatible with the use of B-frames or intra
+ *      refresh.
+ *
+ *      In multi-pass encoding, if x264_encoder_invalidate_reference is called differently in each pass,
+ *      behavior is undefined.
+ *
+ *      Should not be called during an x264_encoder_encode, but multiple calls can be made simultaneously.
+ *
+ *      Returns 0 on success, negative on failure. */
+int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
  
  #endif