copy current macroblock to a smaller buffer, to improve cache coherency and reduce...

[x264] / encoder / encoder.c
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 3ca83ea7ef21085ba0d5102702916ce12ec48606..3adaa312ac967b7ce935a737801b55a4b8225f58 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -21,7 +21,6 @@
   * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
   *****************************************************************************/
  
-#include <stdlib.h>
  #include <stdio.h>
  #include <string.h>
  #include <math.h>
@@ -87,29 +86,6 @@ static int64_t i_mtime_filter = 0;
   ******************************* x264 libs **********************************
   *
   ****************************************************************************/
-static int64_t x264_sqe( x264_t *h, uint8_t *pix1, int i_pix_stride, uint8_t *pix2, int i_pix2_stride, int i_width, int i_height )
-{
-    int64_t i_sqe = 0;
-    int x, y;
-
-#define SSD(size) i_sqe += h->pixf.ssd[size]( pix1+y*i_pix_stride+x, i_pix_stride, \
-                                              pix2+y*i_pix2_stride+x, i_pix2_stride );
-    for( y = 0; y < i_height-15; y += 16 )
-    {
-        for( x = 0; x < i_width-15; x += 16 )
-            SSD(PIXEL_16x16);
-        if( x < i_width-7 )
-            SSD(PIXEL_8x16);
-    }
-    if( y < i_height-7 )
-        for( x = 0; x < i_width-7; x += 8 )
-            SSD(PIXEL_8x8);
-#undef SSD
-    x264_cpu_restore( h->param.cpu );
-
-    return i_sqe;
-}
-
  static float x264_psnr( int64_t i_sqe, int64_t i_size )
  {
      double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
@@ -324,16 +300,6 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal
          bs_write_ue( s, sh->i_cabac_init_idc );
      }
      bs_write_se( s, sh->i_qp_delta );      /* slice qp delta */
-#if 0
-    if( sh->i_type == SLICE_TYPE_SP || sh->i_type == SLICE_TYPE_SI )
-    {
-        if( sh->i_type == SLICE_TYPE_SP )
-        {
-            bs_write1( s, sh->b_sp_for_swidth );
-        }
-        bs_write_se( s, sh->i_qs_delta );
-    }
-#endif
  
      if( sh->pps->b_deblocking_filter_control )
      {
@@ -363,9 +329,9 @@ static int x264_validate_parameters( x264_t *h )
          return -1;
      }
  
-    if( h->param.i_width % 16 != 0 || h->param.i_height % 16 != 0 )
+    if( h->param.i_width % 2 || h->param.i_height % 2 )
      {
-        x264_log( h, X264_LOG_ERROR, "width %% 16 != 0 or height %% 16 != 0 (%dx%d)\n",
+        x264_log( h, X264_LOG_ERROR, "width or height not divisible by 2 (%dx%d)\n",
                    h->param.i_width, h->param.i_height );
          return -1;
      }
@@ -376,7 +342,7 @@ static int x264_validate_parameters( x264_t *h )
      }
  
      h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_SLICE_MAX );
-    h->param.i_threads = X264_MIN( h->param.i_threads, h->param.i_height / 16 );
+    h->param.i_threads = X264_MIN( h->param.i_threads, (h->param.i_height + 15) / 16 );
  #if !(HAVE_PTHREAD)
      if( h->param.i_threads > 1 )
      {
@@ -385,6 +351,12 @@ static int x264_validate_parameters( x264_t *h )
      }
  #endif
  
+    if( h->param.rc.b_cbr )
+        h->param.rc.i_rf_constant = 0;
+    if( h->param.rc.i_rf_constant > 0 )
+        h->param.rc.i_qp_constant = h->param.rc.i_rf_constant;
+    h->param.rc.i_rf_constant = x264_clip3( h->param.rc.i_rf_constant, 0, 51 );
+    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
      if( !h->param.rc.b_cbr && h->param.rc.i_qp_constant == 0 )
      {
          h->mb.b_lossless = 1;
@@ -394,6 +366,17 @@ static int x264_validate_parameters( x264_t *h )
          h->param.rc.f_ip_factor = 1;
          h->param.rc.f_pb_factor = 1;
          h->param.analyse.b_psnr = 0;
+        h->param.analyse.i_chroma_qp_offset = 0;
+        h->param.analyse.i_trellis = 0;
+        h->param.analyse.b_fast_pskip = 0;
+        h->param.analyse.i_noise_reduction = 0;
+    }
+
+    if( ( h->param.i_width % 16 || h->param.i_height % 16 ) && !h->mb.b_lossless )
+    {
+        x264_log( h, X264_LOG_WARNING, 
+                  "width or height not divisible by 16 (%dx%d), compression will suffer.\n",
+                  h->param.i_width, h->param.i_height );
      }
  
      h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
@@ -404,14 +387,12 @@ static int x264_validate_parameters( x264_t *h )
      h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
      h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
      h->param.b_bframe_pyramid = h->param.b_bframe_pyramid && h->param.i_bframe > 1;
+    h->param.b_bframe_adaptive = h->param.b_bframe_adaptive && h->param.i_bframe > 0;
  
      h->param.i_deblocking_filter_alphac0 = x264_clip3( h->param.i_deblocking_filter_alphac0, -6, 6 );
      h->param.i_deblocking_filter_beta    = x264_clip3( h->param.i_deblocking_filter_beta, -6, 6 );
  
-    h->param.i_cabac_init_idc = x264_clip3( h->param.i_cabac_init_idc, -1, 2 );
-    /* don't yet support merging of cabac stats */
-    if( h->param.i_threads > 1 && h->param.i_cabac_init_idc == -1 )
-        h->param.i_cabac_init_idc = 0;
+    h->param.i_cabac_init_idc = x264_clip3( h->param.i_cabac_init_idc, 0, 2 );
  
      if( h->param.i_cqm_preset < X264_CQM_FLAT || h->param.i_cqm_preset > X264_CQM_CUSTOM )
          h->param.i_cqm_preset = X264_CQM_FLAT;
@@ -424,6 +405,11 @@ static int x264_validate_parameters( x264_t *h )
      if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method <= X264_ME_HEX )
          h->param.analyse.i_me_range = 16;
      h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
+    h->param.analyse.b_bframe_rdo = h->param.analyse.b_bframe_rdo && h->param.analyse.i_subpel_refine >= 6;
+    h->param.analyse.b_mixed_references = h->param.analyse.b_mixed_references && h->param.i_frame_reference > 1;
+    h->param.analyse.inter &= X264_ANALYSE_PSUB16x16|X264_ANALYSE_PSUB8x8|X264_ANALYSE_BSUB16x16|
+                              X264_ANALYSE_I4x4|X264_ANALYSE_I8x8;
+    h->param.analyse.intra &= X264_ANALYSE_I4x4|X264_ANALYSE_I8x8;
      if( !(h->param.analyse.inter & X264_ANALYSE_PSUB16x16) )
          h->param.analyse.inter &= ~X264_ANALYSE_PSUB8x8;
      if( !h->param.analyse.b_transform_8x8 )
@@ -432,13 +418,44 @@ static int x264_validate_parameters( x264_t *h )
          h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
      }
      h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
-    h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 2048);
+    if( !h->param.b_cabac )
+        h->param.analyse.i_trellis = 0;
+    h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
+    h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
+
+    {
+        const x264_level_t *l = x264_levels;
+        while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
+            l++;
+        if( l->level_idc == 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "invalid level_idc: %d\n", h->param.i_level_idc );
+            return -1;
+        }
+        if( h->param.analyse.i_mv_range <= 0 )
+            h->param.analyse.i_mv_range = l->mv_range;
+        else
+            h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 2048);
+    }
  
      if( h->param.rc.f_qblur < 0 )
          h->param.rc.f_qblur = 0;
      if( h->param.rc.f_complexity_blur < 0 )
          h->param.rc.f_complexity_blur = 0;
-    h->param.rc.i_qp_constant = x264_clip3(h->param.rc.i_qp_constant, 0, 51);
+
+    /* ensure the booleans are 0 or 1 so they can be used in math */
+#define BOOLIFY(x) h->param.x = !!h->param.x
+    BOOLIFY( b_cabac );
+    BOOLIFY( b_deblocking_filter );
+    BOOLIFY( analyse.b_transform_8x8 );
+    BOOLIFY( analyse.b_weighted_bipred );
+    BOOLIFY( analyse.b_bidir_me );
+    BOOLIFY( analyse.b_chroma_me );
+    BOOLIFY( analyse.b_fast_pskip );
+    BOOLIFY( rc.b_cbr );
+    BOOLIFY( rc.b_stat_write );
+    BOOLIFY( rc.b_stat_read );
+#undef BOOLIFY
  
      return 0;
  }
@@ -535,6 +552,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
      h->pps = &h->pps_array[0];
      x264_pps_init( h->pps, 0, &h->param, h->sps);
  
+    x264_validate_levels( h );
+
      x264_cqm_init( h );
      
      h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
@@ -544,6 +563,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
      h->frames.i_max_ref0 = h->param.i_frame_reference;
      h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
      h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering + 1;
+    h->frames.b_have_lowres = !h->param.rc.b_stat_read
+        && ( h->param.rc.b_cbr || h->param.rc.i_rf_constant || h->param.b_bframe_adaptive );
  
      for( i = 0; i < X264_BFRAME_MAX + 3; i++ )
      {
@@ -569,11 +590,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  
      h->fdec = h->frames.reference[0];
  
-    /* init mb cache */
      x264_macroblock_cache_init( h );
-
-    /* init cabac adaptive model */
-    x264_cabac_model_init( &h->cabac );
+    x264_rdo_init( );
  
      /* init CPU functions */
      x264_predict_16x16_init( h->param.cpu, h->predict_16x16 );
@@ -586,6 +604,7 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
      x264_mc_init( h->param.cpu, &h->mc );
      x264_csp_init( h->param.cpu, h->param.i_csp, &h->csp );
      x264_quant_init( h, h->param.cpu, &h->quantf );
+    x264_deblock_init( h->param.cpu, &h->loopf );
  
      memcpy( h->pixf.mbcmp,
              ( h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1 ) ? h->pixf.sad : h->pixf.satd,
@@ -621,11 +640,9 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
      h->param.analyse.i_me_method = param->analyse.i_me_method;
      h->param.analyse.i_me_range = param->analyse.i_me_range;
      h->param.analyse.i_subpel_refine = param->analyse.i_subpel_refine;
+    h->param.analyse.i_trellis = param->analyse.i_trellis;
      h->param.analyse.intra = param->analyse.intra;
      h->param.analyse.inter = param->analyse.inter;
-    if( h->sps->b_direct8x8_inference && h->param.i_bframe
-        && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_TEMPORAL )
-        h->param.analyse.inter &= ~X264_ANALYSE_PSUB8x8;
  
      memcpy( h->pixf.mbcmp,
              ( h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1 ) ? h->pixf.sad : h->pixf.satd,
@@ -672,7 +689,7 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
      {
          /* identify ourself */
          x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
-        x264_sei_version_write( &h->out.bs );
+        x264_sei_version_write( h, &h->out.bs );
          x264_nal_end( h );
  
          /* generate sequence parameters */
@@ -731,9 +748,7 @@ static void x264_frame_sort( x264_frame_t *list[X264_BFRAME_MAX+1], int b_dts )
                               : dtime > 0;
              if( swap )
              {
-                x264_frame_t *tmp = list[i+1];
-                list[i+1] = list[i];
-                list[i] = tmp;
+                XCHG( x264_frame_t*, list[i], list[i+1] );
                  b_ok = 0;
              }
          }
@@ -773,10 +788,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc, int i_slice_
          {
              if( h->fref0[i]->i_poc < h->fref0[i+1]->i_poc )
              {
-                x264_frame_t *tmp = h->fref0[i+1];
-
-                h->fref0[i+1] = h->fref0[i];
-                h->fref0[i] = tmp;
+                XCHG( x264_frame_t*, h->fref0[i], h->fref0[i+1] );
                  b_ok = 0;
                  break;
              }
@@ -790,10 +802,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc, int i_slice_
          {
              if( h->fref1[i]->i_poc > h->fref1[i+1]->i_poc )
              {
-                x264_frame_t *tmp = h->fref1[i+1];
-
-                h->fref1[i+1] = h->fref1[i];
-                h->fref1[i] = tmp;
+                XCHG( x264_frame_t*, h->fref1[i], h->fref1[i+1] );
                  b_ok = 0;
                  break;
              }
@@ -841,11 +850,7 @@ static inline void x264_reference_update( x264_t *h )
  
      /* move lowres copy of the image to the ref frame */
      for( i = 0; i < 4; i++)
-    {
-        uint8_t *tmp = h->fdec->lowres[i];
-        h->fdec->lowres[i] = h->fenc->lowres[i];
-        h->fenc->lowres[i] = tmp;
-    }
+        XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] );
  
      /* adaptive B decision needs a pointer, since it can't use the ref lists */
      if( h->sh.i_type != SLICE_TYPE_B )
@@ -908,15 +913,6 @@ static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_slice_type,
          /* Nothing to do ? */
      }
  
-    /* get adapative cabac model if needed */
-    if( h->param.b_cabac )
-    {
-        if( h->param.i_cabac_init_idc == -1 )
-        {
-            h->sh.i_cabac_init_idc = x264_cabac_model_get( &h->cabac, i_slice_type );
-        }
-    }
-
      x264_macroblock_slice_init( h );
  }
  
@@ -1049,16 +1045,8 @@ static int x264_slice_write( x264_t *h )
  
      if( h->param.b_cabac )
      {
-        int i_cabac_word;
          x264_cabac_encode_flush( &h->cabac );
-        /* TODO cabac stuffing things (p209) */
-        i_cabac_word = (((3 * h->cabac.i_sym_cnt - 3 * 96 * h->sps->i_mb_width * h->sps->i_mb_height)/32) - bs_pos( &h->out.bs)/8)/3;
  
-        while( i_cabac_word > 0 )
-        {
-            bs_write( &h->out.bs, 16, 0x0000 );
-            i_cabac_word--;
-        }
      }
      else
      {
@@ -1197,11 +1185,15 @@ int     x264_encoder_encode( x264_t *h,
  
          x264_frame_copy_picture( h, fenc, pic_in );
  
+        if( h->param.i_width % 16 || h->param.i_height % 16 )
+            x264_frame_expand_border_mod16( h, fenc );
+
          fenc->i_frame = h->frames.i_input++;
  
          x264_frame_put( h->frames.next, fenc );
  
-        x264_frame_init_lowres( h->param.cpu, fenc );
+        if( h->frames.b_have_lowres )
+            x264_frame_init_lowres( h->param.cpu, fenc );
  
          if( h->frames.i_input <= h->frames.i_delay )
          {
@@ -1349,13 +1341,13 @@ do_encode:
      h->i_nal_ref_idc = i_nal_ref_idc;
  
      /* Write SPS and PPS */
-    if( i_nal_type == NAL_SLICE_IDR )
+    if( i_nal_type == NAL_SLICE_IDR && h->param.b_repeat_headers )
      {
          if( h->fenc->i_frame == 0 )
          {
              /* identify ourself */
              x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
-            x264_sei_version_write( &h->out.bs );
+            x264_sei_version_write( h, &h->out.bs );
              x264_nal_end( h );
          }
  
@@ -1495,11 +1487,6 @@ do_encode:
      }
  
      /* ---------------------- Update encoder state ------------------------- */
-    /* update cabac */
-    if( h->param.b_cabac && h->param.i_cabac_init_idc == -1 )
-    {
-        x264_cabac_model_update( &h->cabac, i_slice_type, h->sh.i_qp );
-    }
  
      /* handle references */
      if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE )
@@ -1519,6 +1506,8 @@ do_encode:
  
      x264_frame_put( h->frames.unused, h->fenc );
  
+    x264_noise_reduction_update( h );
+
      TIMER_STOP( i_mtime_encode_frame );
  
      /* ---------------------- Compute/Print statistics --------------------- */
@@ -1544,9 +1533,10 @@ do_encode:
          int64_t i_sqe_y, i_sqe_u, i_sqe_v;
  
          /* PSNR */
-        i_sqe_y = x264_sqe( h, frame_psnr->plane[0], frame_psnr->i_stride[0], h->fenc->plane[0], h->fenc->i_stride[0], h->param.i_width, h->param.i_height );
-        i_sqe_u = x264_sqe( h, frame_psnr->plane[1], frame_psnr->i_stride[1], h->fenc->plane[1], h->fenc->i_stride[1], h->param.i_width/2, h->param.i_height/2);
-        i_sqe_v = x264_sqe( h, frame_psnr->plane[2], frame_psnr->i_stride[2], h->fenc->plane[2], h->fenc->i_stride[2], h->param.i_width/2, h->param.i_height/2);
+        i_sqe_y = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[0], frame_psnr->i_stride[0], h->fenc->plane[0], h->fenc->i_stride[0], h->param.i_width, h->param.i_height );
+        i_sqe_u = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[1], frame_psnr->i_stride[1], h->fenc->plane[1], h->fenc->i_stride[1], h->param.i_width/2, h->param.i_height/2);
+        i_sqe_v = x264_pixel_ssd_wxh( &h->pixf, frame_psnr->plane[2], frame_psnr->i_stride[2], h->fenc->plane[2], h->fenc->i_stride[2], h->param.i_width/2, h->param.i_height/2);
+        x264_cpu_restore( h->param.cpu );
  
          h->stat.i_sqe_global[i_slice_type] += i_sqe_y + i_sqe_u + i_sqe_v;
          h->stat.f_psnr_average[i_slice_type] += x264_psnr( i_sqe_y + i_sqe_u + i_sqe_v, 3 * h->param.i_width * h->param.i_height / 2 );
@@ -1600,16 +1590,6 @@ do_encode:
  #ifdef DEBUG_DUMP_FRAME
      /* Dump reconstructed frame */
      x264_frame_dump( h, frame_psnr, "fdec.yuv" );
-#endif
-#if 0
-    if( h->i_ref0 > 0 )
-    {
-        x264_frame_dump( h, h->fref0[0], "ref0.yuv" );
-    }
-    if( h->i_ref1 > 0 )
-    {
-        x264_frame_dump( h, h->fref1[0], "ref1.yuv" );
-    }
  #endif
      return 0;
  }
@@ -1713,6 +1693,8 @@ void    x264_encoder_close  ( x264_t *h )
                    i_mb_count[B_SKIP]   / i_count );
      }
  
+    x264_ratecontrol_summary( h );
+
      if( h->stat.i_slice_count[SLICE_TYPE_I] + h->stat.i_slice_count[SLICE_TYPE_P] + h->stat.i_slice_count[SLICE_TYPE_B] > 0 )
      {
          const int i_count = h->stat.i_slice_count[SLICE_TYPE_I] +