]> git.sesse.net Git - x264/commitdiff
Improve DTS generation, move DTS compression into libx264
authorYusuke Nakamura <muken.the.vfrmaniac@gmail.com>
Wed, 27 Jan 2010 00:01:54 +0000 (16:01 -0800)
committerFiona Glaser <fiona@x264.com>
Sat, 30 Jan 2010 12:05:10 +0000 (04:05 -0800)
This change fixes some cases in which PTS could be less than DTS.

Additionally, a new parameter, b_dts_compress, enables DTS compression.
DTS compression eliminates negative DTS (i.e. initial delay) due to B-frames.
The algorithm changes timebase in order to avoid duplicating DTS.
Currently, in x264cli, only the FLV muxer uses it.  The MP4 muxer doesn't need it, as it uses an EditBox instead.

common/common.c
common/common.h
common/frame.c
common/frame.h
encoder/encoder.c
encoder/slicetype.c
output/flv.c
output/mp4.c
x264.c
x264.h

index 9eed5c37e955294471105f79ff66d2680bd615b2..b454e37e31f5f0d753ce9ece49e1764c221903a7 100644 (file)
@@ -157,6 +157,7 @@ void    x264_param_default( x264_param_t *param )
     param->b_annexb = 1;
     param->b_aud = 0;
     param->b_vfr_input = 1;
+    param->b_dts_compress = 0;
 }
 
 static int parse_enum( const char *arg, const char * const *names, int *dst )
index 0f16e0a041fe83b8648c9703533d932a390fedac..ca1533048aab8299063eeb8c55a563c46fdfb189 100644 (file)
@@ -376,6 +376,9 @@ struct x264_t
     x264_pps_t      *pps;
     int             i_idr_pic_id;
 
+    /* Timebase multiplier for DTS compression */
+    int             i_dts_compress_multiplier;
+
     /* quantization matrix for decoding, [cqm][qp%6][coef] */
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[2])[64];   /* [2][6][64] */
@@ -429,6 +432,8 @@ struct x264_t
         int i_delay;    /* Number of frames buffered for B reordering */
         int     i_bframe_delay;
         int64_t i_bframe_delay_time;
+        int64_t i_init_delta;
+        int64_t i_prev_dts[2];
         int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
         int b_have_sub8x8_esa;
     } frames;
index e01d77936a69bf952c1337f0b54836c1876ca379..e7003fdf07c4359f73f5b24488b608fc95e4dcdb 100644 (file)
@@ -223,7 +223,7 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
-    dst->i_pts      = dst->i_dts = src->i_pts;
+    dst->i_pts      = dst->i_reordered_pts = src->i_pts;
     dst->param      = src->param;
 
     for( i=0; i<3; i++ )
index 786869e5660edd6dcc3904bc16dbe5ca061f392c..b1852b339a8bd9d32e8241862675623de510f5e0 100644 (file)
@@ -35,7 +35,7 @@ typedef struct x264_frame
     int     i_type;
     int     i_qpplus1;
     int64_t i_pts;
-    int64_t i_dts;
+    int64_t i_reordered_pts;
     x264_param_t *param;
 
     int     i_frame;     /* Presentation frame number */
index d69cf520f0ad14092f48b4441105b6537b513c1c..8d3bc34406febb87aa70e8b03c182c52305a2e8e 100644 (file)
@@ -863,6 +863,18 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->i_frame = -1;
     h->i_frame_num = 0;
     h->i_idr_pic_id = 0;
+    if( h->param.b_dts_compress )
+    {
+        /* h->i_dts_compress_multiplier == h->frames.i_bframe_delay + 1 */
+        h->i_dts_compress_multiplier = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 3 : 2) : 1;
+        if( h->i_dts_compress_multiplier != 1 )
+            x264_log( h, X264_LOG_DEBUG, "DTS compresion changed timebase: %d/%d -> %d/%d\n",
+                      h->param.i_timebase_num, h->param.i_timebase_den,
+                      h->param.i_timebase_num, h->param.i_timebase_den * h->i_dts_compress_multiplier );
+        h->param.i_timebase_den *= h->i_dts_compress_multiplier;
+    }
+    else
+        h->i_dts_compress_multiplier = 1;
 
     h->sps = &h->sps_array[0];
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
@@ -2388,8 +2400,31 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         pic_out->i_type = X264_TYPE_B;
 
     pic_out->b_keyframe = h->fenc->b_keyframe;
-    pic_out->i_pts = h->fenc->i_pts;
-    pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+
+    pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
+    if( h->frames.i_bframe_delay )
+    {
+        int64_t *i_prev_dts = thread_current->frames.i_prev_dts;
+        if( h->i_frame <= h->frames.i_bframe_delay )
+        {
+            if( h->i_dts_compress_multiplier == 1 )
+                pic_out->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
+            else
+            {
+                /* DTS compression */
+                if( h->i_frame == 1 )
+                    thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
+                pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
+            }
+        }
+        else
+            pic_out->i_dts = i_prev_dts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
+        i_prev_dts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
+    }
+    else
+        pic_out->i_dts = h->fenc->i_reordered_pts;
+    assert( pic_out->i_pts >= pic_out->i_dts );
+
     pic_out->img.i_plane = h->fdec->i_plane;
     for(i = 0; i < 3; i++)
     {
index 18309e4a63f5cf139add074500d3b860ea33a9b0..c46aee5ead5030a6af99bb029402eca87d698ce5 100644 (file)
@@ -1336,10 +1336,10 @@ void x264_slicetype_decide( x264_t *h )
         {
             int idx = index[h->lookahead->next.list[i]->i_type == X264_TYPE_BREF]++;
             frames[idx] = h->lookahead->next.list[i];
-            frames[idx]->i_dts = h->lookahead->next.list[idx]->i_pts;
+            frames[idx]->i_reordered_pts = h->lookahead->next.list[idx]->i_pts;
         }
         frames[0] = h->lookahead->next.list[bframes];
-        frames[0]->i_dts = h->lookahead->next.list[0]->i_pts;
+        frames[0]->i_reordered_pts = h->lookahead->next.list[0]->i_pts;
         memcpy( h->lookahead->next.list, frames, (bframes+1) * sizeof(x264_frame_t*) );
     }
     for( i = 0; i <= bframes; i++ )
index 8a937cf1018aaf6efac9de37375cf8b058f9b10b..d73466b5ddb764cd1dca420b5561ca044e9c05ac 100644 (file)
@@ -37,8 +37,6 @@ typedef struct
     int64_t i_fps_num;
     int64_t i_fps_den;
     int64_t i_framenum;
-    int     i_init_delay;
-    int     i_delay_time;
 
     uint64_t i_framerate_pos;
     uint64_t i_duration_pos;
@@ -46,8 +44,8 @@ typedef struct
     uint64_t i_bitrate_pos;
 
     uint8_t b_write_length;
-    int64_t i_init_delta;
-    int64_t i_prev_timestamps[2];
+    int64_t i_prev_dts;
+    int64_t i_prev_pts;
 
     int i_timebase_num;
     int i_timebase_den;
@@ -146,10 +144,8 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_flv->i_fps_den = p_param->i_fps_den;
     p_flv->i_timebase_num = p_param->i_timebase_num;
     p_flv->i_timebase_den = p_param->i_timebase_den;
-    p_flv->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
     p_flv->b_vfr_input = p_param->b_vfr_input;
 
-
     return 0;
 }
 
@@ -216,45 +212,29 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     flv_hnd_t *p_flv = handle;
     flv_buffer *c = p_flv->c;
 
-    int64_t dts;
-    int64_t cts;
-    int64_t offset;
-
-    if( !p_flv->i_framenum )
-        p_flv->i_delay_time = p_picture->i_dts;
+    int64_t dts = (int64_t)( (p_picture->i_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t cts = (int64_t)( (p_picture->i_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t offset = cts - dts;
 
-    if( !p_flv->i_init_delay )
-        dts = cts = (int64_t)((p_picture->i_pts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-    else
+    if( p_flv->i_framenum )
     {
-        // Use DTS compression
-        dts = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum == 1 )
-            p_flv->i_init_delta = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum > p_flv->i_init_delay )
+        int64_t prev_dts = (int64_t)( (p_flv->i_prev_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        int64_t prev_cts = (int64_t)( (p_flv->i_prev_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        if( prev_dts == dts )
         {
-            dts = p_flv->i_prev_timestamps[ (p_flv->i_framenum - p_flv->i_init_delay) % p_flv->i_init_delay ];
-            dts = (int64_t)((dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
+            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
+                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
         }
-        else if( p_flv->i_init_delta )
+        if( prev_cts == cts )
         {
-            // Compressed DTSs might not fit in input timescale
-            double compressed_dts;
-            compressed_dts = (p_flv->i_framenum * ((double)p_flv->i_init_delta / (2 * p_flv->i_init_delay)));
-            dts = (int64_t)((compressed_dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
+            fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" is generated by rounding\n"
+                             "               current internal composition framerate: %.6f fps\n", cts, fps );
         }
-
-        p_flv->i_prev_timestamps[ p_flv->i_framenum % p_flv->i_init_delay ] = p_picture->i_dts - p_flv->i_delay_time;
-
-        cts = p_picture->i_pts;
-        cts = (int64_t)((cts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-     }
-
-    offset = cts - dts;
-
-    assert( cts >= dts );
+    }
+    p_flv->i_prev_dts = p_picture->i_dts;
+    p_flv->i_prev_pts = p_picture->i_pts;
 
     // A new frame - write packet header
     x264_put_byte( c, FLV_TAG_TYPE_VIDEO );
index 7889e4fe25fbd42ae54c337b47a8c7e365f3cba2..e3ad9c61dea2b129a36439d707d108a43d9e2f0b 100644 (file)
@@ -34,11 +34,7 @@ typedef struct
     int i_time_res;
     int64_t i_time_inc;
     int i_numframe;
-    int i_init_delay;
     int i_delay_time;
-
-    int64_t i_prev_timestamps[2];
-    int64_t i_init_delta;
 } mp4_hnd_t;
 
 static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
@@ -195,8 +191,6 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_mp4->i_time_res = p_param->i_timebase_den;
     p_mp4->i_time_inc = p_param->i_timebase_num;
 
-    p_mp4->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
-
     p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL,
                                         p_mp4->i_time_res );
 
@@ -282,7 +276,6 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     mp4_hnd_t *p_mp4 = handle;
     int64_t dts;
     int64_t cts;
-    int32_t offset = 0;
 
     memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size );
     p_mp4->p_sample->dataLength += i_size;
@@ -290,27 +283,12 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     if( !p_mp4->i_numframe )
         p_mp4->i_delay_time = p_picture->i_dts * -1;
 
-    if( !p_mp4->i_init_delay )
-        dts = cts = p_picture->i_pts * p_mp4->i_time_inc;
-    else
-    {
-        if( p_mp4->i_numframe <= p_mp4->i_init_delay )
-            dts = p_picture->i_dts + p_mp4->i_delay_time;
-        else
-            dts = p_mp4->i_prev_timestamps[ (p_mp4->i_numframe - p_mp4->i_init_delay) % p_mp4->i_init_delay ] + p_mp4->i_delay_time;
-
-        // unordered pts
-        p_mp4->i_prev_timestamps[ p_mp4->i_numframe % p_mp4->i_init_delay ] = p_picture->i_dts + p_mp4->i_delay_time;
-
-        dts *= p_mp4->i_time_inc;
-        cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
-
-        offset = cts - dts;
-    }
+    dts = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
+    cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
 
     p_mp4->p_sample->IsRAP = p_picture->b_keyframe;
     p_mp4->p_sample->DTS = dts;
-    p_mp4->p_sample->CTS_Offset = offset;
+    p_mp4->p_sample->CTS_Offset = (uint32_t)(cts - dts);
     gf_isom_add_sample( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_mp4->p_sample );
 
     p_mp4->p_sample->dataLength = 0;
diff --git a/x264.c b/x264.c
index db3353689938c47a570987a501843811538f20b2..8669cb33259e588b4e9c9ee202374fb9c06be7c2 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -683,6 +683,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mp4_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
 #else
         fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
@@ -694,6 +695,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mkv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
     }
     else if( !strcasecmp( ext, "flv" ) )
@@ -701,6 +703,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = flv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 1;
         param->b_repeat_headers = 0;
     }
     else
@@ -1455,6 +1458,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     int64_t second_largest_pts = -1;
     int64_t ticks_per_frame;
     double  duration;
+    int     prev_timebase_den = param->i_timebase_den;
+    int     dts_compress_multiplier;
 
     opt->b_progress &= param->i_log_level < X264_LOG_DEBUG;
     i_frame_total = input.get_frame_total( opt->hin );
@@ -1474,6 +1479,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
 
     x264_encoder_parameters( h, param );
 
+    dts_compress_multiplier = param->i_timebase_den / prev_timebase_den;
+
     if( output.set_param( opt->hout, param ) )
     {
         fprintf( stderr, "x264 [error]: can't set outfile param\n" );
@@ -1528,7 +1535,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
             {
                 if( h->param.i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
-                             i_frame, pic.i_pts, largest_pts );
+                             i_frame, pic.i_pts * dts_compress_multiplier, largest_pts * dts_compress_multiplier );
                 else if( pts_warning_cnt == MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
                 pts_warning_cnt++;
@@ -1583,6 +1590,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         duration = (double)param->i_fps_den / param->i_fps_num;
     else
         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
+    duration *= dts_compress_multiplier;
 
     i_end = x264_mdate();
     input.picture_clean( &pic );
diff --git a/x264.h b/x264.h
index 1223df78961d4e833b91a3fe62772c513d5e2a28..2550864068bef60134bf15ce79b2e222f9625802 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
 
 #include <stdarg.h>
 
-#define X264_BUILD 83
+#define X264_BUILD 84
 
 /* x264_t:
  *      opaque handler for encoder */
@@ -316,6 +316,9 @@ typedef struct x264_param_t
     int b_vfr_input;            /* VFR input */
     int i_timebase_num;         /* Timebase numerator */
     int i_timebase_den;         /* Timebase denominator */
+    int b_dts_compress;         /* DTS compression: this algorithm eliminates negative DTS
+                                 * by compressing them to be less than the second PTS.
+                                 * Warning: this will change the timebase! */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */