MBAFF: Copy deblocked pixels to other plane

[x264] / encoder / encoder.c
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 48dfc805179507949516a1bdfd30da472f02b278..1e9a46fa19176196a866ba749f6ad055afa785c2 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -532,10 +532,10 @@ static int x264_validate_parameters( x264_t *h, int b_open )
      h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
      h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
      h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX );
-    h->param.rc.i_bitrate = X264_MAX( h->param.rc.i_bitrate, 0 );
-    h->param.rc.i_vbv_buffer_size = X264_MAX( h->param.rc.i_vbv_buffer_size, 0 );
-    h->param.rc.i_vbv_max_bitrate = X264_MAX( h->param.rc.i_vbv_max_bitrate, 0 );
-    h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, 0 );
+    h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 );
+    h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 );
+    h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 );
+    h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init, 0, 2000000 );
      if( h->param.rc.i_vbv_buffer_size )
      {
          if( h->param.rc.i_rc_method == X264_RC_CQP )
@@ -701,8 +701,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
      if( h->param.analyse.i_me_method < X264_ME_DIA ||
          h->param.analyse.i_me_method > X264_ME_TESA )
          h->param.analyse.i_me_method = X264_ME_HEX;
-    if( h->param.analyse.i_me_range < 4 )
-        h->param.analyse.i_me_range = 4;
+    h->param.analyse.i_me_range = x264_clip3( h->param.analyse.i_me_range, 4, 1024 );
      if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method <= X264_ME_HEX )
          h->param.analyse.i_me_range = 16;
      if( h->param.analyse.i_me_method == X264_ME_TESA &&
@@ -1036,6 +1035,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
      h->mb.i_mb_width = h->sps->i_mb_width;
      h->mb.i_mb_height = h->sps->i_mb_height;
      h->mb.i_mb_count = h->mb.i_mb_width * h->mb.i_mb_height;
+    /* Adaptive MBAFF and subme 0 are not supported as we require halving motion
+     * vectors during prediction, resulting in hpel mvs.
+     * The chosen solution is to make MBAFF non-adaptive in this case. */
+    h->mb.b_adaptive_mbaff = h->param.b_interlaced && h->param.analyse.i_subpel_refine;
  
      /* Init frames. */
      if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && !h->param.rc.b_stat_read )
@@ -1157,8 +1160,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
          * ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min )
            : pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor )));
  
-    CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
      h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
+    CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
  
      if( h->param.i_threads > 1 &&
          x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
@@ -1309,22 +1312,17 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
      if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
            param->rc.i_vbv_max_bitrate > 0 &&   param->rc.i_vbv_buffer_size > 0 )
      {
+        rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
+        rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
+        rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
          COPY( rc.i_vbv_max_bitrate );
          COPY( rc.i_vbv_buffer_size );
          COPY( rc.i_bitrate );
-        rc_reconfig = 1;
      }
-    if( h->param.rc.f_rf_constant != param->rc.f_rf_constant )
-    {
-        COPY( rc.f_rf_constant );
-        rc_reconfig = 1;
-    }
-    if( h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max )
-    {
-        COPY( rc.f_rf_constant_max );
-        rc_reconfig = 1;
-    }
-
+    rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
+    rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
+    COPY( rc.f_rf_constant );
+    COPY( rc.f_rf_constant_max );
  #undef COPY
  
      mbcmp_init( h );
@@ -1408,9 +1406,11 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
          nal_size += h->out.nal[i].i_payload;
  
      /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
-    if( h->nal_buffer_size < nal_size * 3/2 + h->out.i_nal * 4 )
+    int necessary_size = nal_size * 3/2 + h->out.i_nal * 4;
+    if( h->nal_buffer_size < necessary_size )
      {
-        uint8_t *buf = x264_malloc( nal_size * 2 + h->out.i_nal * 4 );
+        h->nal_buffer_size = necessary_size * 2;
+        uint8_t *buf = x264_malloc( h->nal_buffer_size );
          if( !buf )
              return -1;
          if( previous_nal_size )
@@ -1465,6 +1465,8 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
          return -1;
  
      frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    if( frame_size < 0 )
+        return -1;
  
      /* now set output*/
      *pi_nal = h->out.i_nal;
@@ -1743,7 +1745,10 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
      int b_measure_quality = 1;
      int min_y = mb_y - (1 << h->sh.b_mbaff);
      int b_start = min_y == h->i_threadslice_start;
-    int max_y = b_end ? h->i_threadslice_end : mb_y;
+    /* Even in interlaced mode, deblocking never modifies more than 4 pixels
+     * above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */
+    int minpix_y = min_y*16 - 4 * !b_start;
+    int maxpix_y = mb_y*16 - 4 * !b_end;
      b_deblock &= b_hpel || h->param.psz_dump_yuv;
      if( h->param.b_sliced_threads && b_start && min_y && !b_inloop )
      {
@@ -1756,9 +1761,19 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
          return;
  
      if( b_deblock )
-        for( int y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) )
+        for( int y = min_y; y < mb_y; y += (1 << h->sh.b_mbaff) )
              x264_frame_deblock_row( h, y );
  
+    /* FIXME: Prediction requires different borders for interlaced/progressive mc,
+     * but the actual image data is equivalent. For now, maintain this
+     * consistency by copying deblocked pixels between planes. */
+    if( h->param.b_interlaced )
+        for( int p = 0; p < 2; p++ )
+            for( int i = minpix_y>>p; i < maxpix_y>>p; i++ )
+                memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
+                        h->fdec->plane[p]     + i*h->fdec->i_stride[p],
+                        h->mb.i_mb_width*16*sizeof(pixel) );
+
      if( b_hpel )
      {
          int end = mb_y == h->mb.i_mb_height;
@@ -1770,25 +1785,30 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
          }
      }
  
+    if( h->sh.b_mbaff )
+        for( int i = 0; i < 2; i++ )
+        {
+            XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
+            XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] );
+        }
+
      if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
          x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
  
-    min_y = min_y*16 - 8 * !b_start;
-    max_y = b_end ? X264_MIN( h->i_threadslice_end*16 , h->param.i_height ) : mb_y*16 - 8;
-
      if( b_measure_quality )
      {
+        maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
          if( h->param.analyse.b_psnr )
          {
              uint64_t ssd_y = x264_pixel_ssd_wxh( &h->pixf,
-                h->fdec->plane[0] + min_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
-                h->fenc->plane[0] + min_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
-                h->param.i_width, max_y-min_y );
+                h->fdec->plane[0] + minpix_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
+                h->fenc->plane[0] + minpix_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
+                h->param.i_width, maxpix_y-minpix_y );
              uint64_t ssd_u, ssd_v;
              x264_pixel_ssd_nv12( &h->pixf,
-                h->fdec->plane[1] + (min_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
-                h->fenc->plane[1] + (min_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
-                h->param.i_width>>1, (max_y-min_y)>>1, &ssd_u, &ssd_v );
+                h->fdec->plane[1] + (minpix_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
+                h->fenc->plane[1] + (minpix_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
+                h->param.i_width>>1, (maxpix_y-minpix_y)>>1, &ssd_u, &ssd_v );
              h->stat.frame.i_ssd[0] += ssd_y;
              h->stat.frame.i_ssd[1] += ssd_u;
              h->stat.frame.i_ssd[2] += ssd_v;
@@ -1799,12 +1819,12 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
              x264_emms();
              /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
               * and overlap by 4 */
-            min_y += b_start ? 2 : -6;
+            minpix_y += b_start ? 2 : -6;
              h->stat.frame.f_ssim +=
                  x264_pixel_ssim_wxh( &h->pixf,
-                    h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
-                    h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
-                    h->param.i_width-2, max_y-min_y, h->scratch_buffer );
+                    h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
+                    h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
+                    h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer );
          }
      }
  }
@@ -2031,6 +2051,17 @@ static int x264_slice_write( x264_t *h )
          if( i_mb_x == 0 && !h->mb.b_reencode_mb )
              x264_fdec_filter_row( h, i_mb_y, 1 );
  
+        if( h->param.b_interlaced )
+        {
+            if( h->mb.b_adaptive_mbaff )
+            {
+                if( !(i_mb_y&1) )
+                    h->mb.b_interlaced = 1;
+                x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
+            }
+            h->mb.field[mb_xy] = h->mb.b_interlaced;
+        }
+
          /* load cache */
          x264_macroblock_cache_load( h, i_mb_x, i_mb_y );
  
@@ -2265,6 +2296,7 @@ static void x264_thread_sync_context( x264_t *dst, x264_t *src )
      memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
      dst->param = src->param;
      dst->stat = src->stat;
+    dst->pixf = src->pixf;
  }
  
  static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
@@ -2911,6 +2943,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
      }
  
      int frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    if( frame_size < 0 )
+        return -1;
  
      /* Set output picture properties */
      pic_out->i_type = h->fenc->i_type;
@@ -2964,6 +2998,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
          if( x264_nal_end( h ) )
              return -1;
          int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+        if( total_size < 0 )
+            return -1;
          frame_size += total_size;
          filler -= total_size;
      }