]> git.sesse.net Git - x264/blobdiff - encoder/encoder.c
MBAFF: Copy deblocked pixels to other plane
[x264] / encoder / encoder.c
index fc8add1a60d217dffcc4f321fd70dde25bf30257..1e9a46fa19176196a866ba749f6ad055afa785c2 100644 (file)
@@ -1035,6 +1035,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->mb.i_mb_width = h->sps->i_mb_width;
     h->mb.i_mb_height = h->sps->i_mb_height;
     h->mb.i_mb_count = h->mb.i_mb_width * h->mb.i_mb_height;
+    /* Adaptive MBAFF and subme 0 are not supported as we require halving motion
+     * vectors during prediction, resulting in hpel mvs.
+     * The chosen solution is to make MBAFF non-adaptive in this case. */
+    h->mb.b_adaptive_mbaff = h->param.b_interlaced && h->param.analyse.i_subpel_refine;
 
     /* Init frames. */
     if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && !h->param.rc.b_stat_read )
@@ -1156,8 +1160,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
         * ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min )
           : pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor )));
 
-    CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
+    CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
 
     if( h->param.i_threads > 1 &&
         x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
@@ -1402,9 +1406,11 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
         nal_size += h->out.nal[i].i_payload;
 
     /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
-    if( h->nal_buffer_size < nal_size * 3/2 + h->out.i_nal * 4 )
+    int necessary_size = nal_size * 3/2 + h->out.i_nal * 4;
+    if( h->nal_buffer_size < necessary_size )
     {
-        uint8_t *buf = x264_malloc( nal_size * 2 + h->out.i_nal * 4 );
+        h->nal_buffer_size = necessary_size * 2;
+        uint8_t *buf = x264_malloc( h->nal_buffer_size );
         if( !buf )
             return -1;
         if( previous_nal_size )
@@ -1459,6 +1465,8 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
         return -1;
 
     frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    if( frame_size < 0 )
+        return -1;
 
     /* now set output*/
     *pi_nal = h->out.i_nal;
@@ -1737,7 +1745,10 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
     int b_measure_quality = 1;
     int min_y = mb_y - (1 << h->sh.b_mbaff);
     int b_start = min_y == h->i_threadslice_start;
-    int max_y = b_end ? h->i_threadslice_end : mb_y;
+    /* Even in interlaced mode, deblocking never modifies more than 4 pixels
+     * above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */
+    int minpix_y = min_y*16 - 4 * !b_start;
+    int maxpix_y = mb_y*16 - 4 * !b_end;
     b_deblock &= b_hpel || h->param.psz_dump_yuv;
     if( h->param.b_sliced_threads && b_start && min_y && !b_inloop )
     {
@@ -1750,9 +1761,19 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
         return;
 
     if( b_deblock )
-        for( int y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) )
+        for( int y = min_y; y < mb_y; y += (1 << h->sh.b_mbaff) )
             x264_frame_deblock_row( h, y );
 
+    /* FIXME: Prediction requires different borders for interlaced/progressive mc,
+     * but the actual image data is equivalent. For now, maintain this
+     * consistency by copying deblocked pixels between planes. */
+    if( h->param.b_interlaced )
+        for( int p = 0; p < 2; p++ )
+            for( int i = minpix_y>>p; i < maxpix_y>>p; i++ )
+                memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
+                        h->fdec->plane[p]     + i*h->fdec->i_stride[p],
+                        h->mb.i_mb_width*16*sizeof(pixel) );
+
     if( b_hpel )
     {
         int end = mb_y == h->mb.i_mb_height;
@@ -1764,25 +1785,30 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
         }
     }
 
+    if( h->sh.b_mbaff )
+        for( int i = 0; i < 2; i++ )
+        {
+            XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
+            XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] );
+        }
+
     if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
         x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
 
-    min_y = min_y*16 - 8 * !b_start;
-    max_y = b_end ? X264_MIN( h->i_threadslice_end*16 , h->param.i_height ) : mb_y*16 - 8;
-
     if( b_measure_quality )
     {
+        maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
         if( h->param.analyse.b_psnr )
         {
             uint64_t ssd_y = x264_pixel_ssd_wxh( &h->pixf,
-                h->fdec->plane[0] + min_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
-                h->fenc->plane[0] + min_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
-                h->param.i_width, max_y-min_y );
+                h->fdec->plane[0] + minpix_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
+                h->fenc->plane[0] + minpix_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
+                h->param.i_width, maxpix_y-minpix_y );
             uint64_t ssd_u, ssd_v;
             x264_pixel_ssd_nv12( &h->pixf,
-                h->fdec->plane[1] + (min_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
-                h->fenc->plane[1] + (min_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
-                h->param.i_width>>1, (max_y-min_y)>>1, &ssd_u, &ssd_v );
+                h->fdec->plane[1] + (minpix_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
+                h->fenc->plane[1] + (minpix_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
+                h->param.i_width>>1, (maxpix_y-minpix_y)>>1, &ssd_u, &ssd_v );
             h->stat.frame.i_ssd[0] += ssd_y;
             h->stat.frame.i_ssd[1] += ssd_u;
             h->stat.frame.i_ssd[2] += ssd_v;
@@ -1793,12 +1819,12 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
             x264_emms();
             /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
              * and overlap by 4 */
-            min_y += b_start ? 2 : -6;
+            minpix_y += b_start ? 2 : -6;
             h->stat.frame.f_ssim +=
                 x264_pixel_ssim_wxh( &h->pixf,
-                    h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
-                    h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
-                    h->param.i_width-2, max_y-min_y, h->scratch_buffer );
+                    h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
+                    h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
+                    h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer );
         }
     }
 }
@@ -2025,6 +2051,17 @@ static int x264_slice_write( x264_t *h )
         if( i_mb_x == 0 && !h->mb.b_reencode_mb )
             x264_fdec_filter_row( h, i_mb_y, 1 );
 
+        if( h->param.b_interlaced )
+        {
+            if( h->mb.b_adaptive_mbaff )
+            {
+                if( !(i_mb_y&1) )
+                    h->mb.b_interlaced = 1;
+                x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
+            }
+            h->mb.field[mb_xy] = h->mb.b_interlaced;
+        }
+
         /* load cache */
         x264_macroblock_cache_load( h, i_mb_x, i_mb_y );
 
@@ -2906,6 +2943,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
     }
 
     int frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    if( frame_size < 0 )
+        return -1;
 
     /* Set output picture properties */
     pic_out->i_type = h->fenc->i_type;
@@ -2959,6 +2998,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         if( x264_nal_end( h ) )
             return -1;
         int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+        if( total_size < 0 )
+            return -1;
         frame_size += total_size;
         filler -= total_size;
     }