]> git.sesse.net Git - x264/blobdiff - common/frame.c
CAVLC optimizations
[x264] / common / frame.c
index 43da631b34218577cd2ad47b1c3f6014d4dc0dfa..bd7ad45c2c074d5eff12176ea20baa140263ca06 100644 (file)
@@ -5,6 +5,7 @@
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
+ *          Fiona Glaser <fiona@x264.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,6 +35,7 @@ x264_frame_t *x264_frame_new( x264_t *h )
     int i_stride, i_width, i_lines;
     int i_padv = PADV << h->param.b_interlaced;
     int luma_plane_size;
+    int chroma_plane_size;
     int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
 
     if( !frame ) return NULL;
@@ -48,23 +50,32 @@ x264_frame_t *x264_frame_new( x264_t *h )
     frame->i_plane = 3;
     for( i = 0; i < 3; i++ )
     {
-        frame->i_stride[i] = i_stride >> !!i;
+        frame->i_stride[i] = ALIGN( i_stride >> !!i, 16 );
         frame->i_width[i] = i_width >> !!i;
         frame->i_lines[i] = i_lines >> !!i;
     }
 
     luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
+    chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
     for( i = 1; i < 3; i++ )
     {
-        CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
+        CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
         frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
     }
     /* all 4 luma planes allocated together, since the cacheline split code
      * requires them to be in-phase wrt cacheline alignment. */
-    CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
-    for( i = 0; i < 4; i++ )
-        frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
-    frame->plane[0] = frame->filtered[0];
+    if( h->param.analyse.i_subpel_refine )
+    {
+        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
+        for( i = 0; i < 4; i++ )
+            frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+        frame->plane[0] = frame->filtered[0];
+    }
+    else
+    {
+        CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
+        frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+    }
 
     if( h->frames.b_have_lowres )
     {
@@ -90,7 +101,7 @@ x264_frame_t *x264_frame_new( x264_t *h )
     if( h->param.analyse.i_me_method >= X264_ME_ESA )
     {
         CHECKED_MALLOC( frame->buffer[3],
-                        2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
+                        frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
         frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
     }
 
@@ -124,7 +135,11 @@ x264_frame_t *x264_frame_new( x264_t *h )
             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
     if( h->param.rc.i_aq_mode )
+    {
         CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+        if( h->frames.b_have_lowres )
+            CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
+    }
 
     x264_pthread_mutex_init( &frame->mutex, NULL );
     x264_pthread_cond_init( &frame->cv, NULL );
@@ -152,6 +167,9 @@ void x264_frame_delete( x264_frame_t *frame )
             x264_free( frame->lowres_mvs[j][i] );
             x264_free( frame->lowres_mv_costs[j][i] );
         }
+    x264_free( frame->f_qp_offset );
+    x264_free( frame->i_inv_qscale_factor );
+    x264_free( frame->i_intra_cost );
     x264_free( frame->i_row_bits );
     x264_free( frame->i_row_qp );
     x264_free( frame->mb_type );
@@ -607,6 +625,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     const int b_interlaced = h->sh.b_mbaff;
     const int mvy_limit = 4 >> b_interlaced;
     const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
+    const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
     int mb_x;
     int stridey   = h->fdec->i_stride[0];
     int stride2y  = stridey << b_interlaced;
@@ -692,24 +711,31 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                     if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
                         h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
                         bS[i] = 2;\
-                    else\
+                    else if(!(i_edge&no_sub8x8))\
                     {\
-                        /* FIXME: A given frame may occupy more than one position in\
-                         * the reference list. So we should compare the frame numbers,\
-                         * not the indices in the ref list.\
-                         * No harm yet, as we don't generate that case.*/\
-                        int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
-                        int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
-                        int i4p= mb_4x4+x+y*s4x4;\
-                        int i4q= mbn_4x4+xn+yn*s4x4;\
-                        for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
-                            if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
-                                abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
-                                abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
+                        if((i&no_sub8x8) && bS[i-1] != 2)\
+                            bS[i] = bS[i-1];\
+                        else\
+                        {\
+                            /* FIXME: A given frame may occupy more than one position in\
+                             * the reference list. So we should compare the frame numbers,\
+                             * not the indices in the ref list.\
+                             * No harm yet, as we don't generate that case.*/\
+                            int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
+                            int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
+                            int i4p= mb_4x4+x+y*s4x4;\
+                            int i4q= mbn_4x4+xn+yn*s4x4;\
+                            if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
+                                abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
+                                abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
+                               (h->sh.i_type == SLICE_TYPE_B &&\
+                               (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
+                                abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
+                                abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
                             {\
                                 bS[i] = 1;\
-                                break;\
                             }\
+                        }\
                     }\
                 }\
             }\
@@ -720,7 +746,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
         #define DEBLOCK_DIR(i_dir)\
         {\
             int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
-            int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
+            int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
             DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
             if( i_edge )\
                 i_edge+= b_8x8_transform;\