From afc36d0b0ff867541827e3ff0f517df4cdf31fd6 Mon Sep 17 00:00:00 2001
From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
Date: Tue, 26 Jan 2010 16:01:54 -0800
Subject: [PATCH] Improve DTS generation, move DTS compression into libx264
 This change fixes some cases in which PTS could be less than DTS.

Additionally, a new parameter, b_dts_compress, enables DTS compression.
DTS compression eliminates negative DTS (i.e. initial delay) due to B-frames.
The algorithm changes timebase in order to avoid duplicating DTS.
Currently, in x264cli, only the FLV muxer uses it.  The MP4 muxer doesn't need it, as it uses an EditBox instead.
---
 common/common.c     |  1 +
 common/common.h     |  5 ++++
 common/frame.c      |  2 +-
 common/frame.h      |  2 +-
 encoder/encoder.c   | 39 ++++++++++++++++++++++++++++--
 encoder/slicetype.c |  4 ++--
 output/flv.c        | 58 +++++++++++++++------------------------------
 output/mp4.c        | 28 +++-------------------
 x264.c              | 10 +++++++-
 x264.h              |  5 +++-
 10 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/common/common.c b/common/common.c
index 9eed5c37..b454e37e 100644
--- a/common/common.c
+++ b/common/common.c
@@ -157,6 +157,7 @@ void    x264_param_default( x264_param_t *param )
     param->b_annexb = 1;
     param->b_aud = 0;
     param->b_vfr_input = 1;
+    param->b_dts_compress = 0;
 }
 
 static int parse_enum( const char *arg, const char * const *names, int *dst )
diff --git a/common/common.h b/common/common.h
index 0f16e0a0..ca153304 100644
--- a/common/common.h
+++ b/common/common.h
@@ -376,6 +376,9 @@ struct x264_t
     x264_pps_t      *pps;
     int             i_idr_pic_id;
 
+    /* Timebase multiplier for DTS compression */
+    int             i_dts_compress_multiplier;
+
     /* quantization matrix for decoding, [cqm][qp%6][coef] */
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[2])[64];   /* [2][6][64] */
@@ -429,6 +432,8 @@ struct x264_t
         int i_delay;    /* Number of frames buffered for B reordering */
         int     i_bframe_delay;
         int64_t i_bframe_delay_time;
+        int64_t i_init_delta;
+        int64_t i_prev_dts[2];
         int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
         int b_have_sub8x8_esa;
     } frames;
diff --git a/common/frame.c b/common/frame.c
index e01d7793..e7003fdf 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -223,7 +223,7 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
-    dst->i_pts      = dst->i_dts = src->i_pts;
+    dst->i_pts      = dst->i_reordered_pts = src->i_pts;
     dst->param      = src->param;
 
     for( i=0; i<3; i++ )
diff --git a/common/frame.h b/common/frame.h
index 786869e5..b1852b33 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -35,7 +35,7 @@ typedef struct x264_frame
     int     i_type;
     int     i_qpplus1;
     int64_t i_pts;
-    int64_t i_dts;
+    int64_t i_reordered_pts;
     x264_param_t *param;
 
     int     i_frame;     /* Presentation frame number */
diff --git a/encoder/encoder.c b/encoder/encoder.c
index d69cf520..8d3bc344 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -863,6 +863,18 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->i_frame = -1;
     h->i_frame_num = 0;
     h->i_idr_pic_id = 0;
+    if( h->param.b_dts_compress )
+    {
+        /* h->i_dts_compress_multiplier == h->frames.i_bframe_delay + 1 */
+        h->i_dts_compress_multiplier = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 3 : 2) : 1;
+        if( h->i_dts_compress_multiplier != 1 )
+            x264_log( h, X264_LOG_DEBUG, "DTS compresion changed timebase: %d/%d -> %d/%d\n",
+                      h->param.i_timebase_num, h->param.i_timebase_den,
+                      h->param.i_timebase_num, h->param.i_timebase_den * h->i_dts_compress_multiplier );
+        h->param.i_timebase_den *= h->i_dts_compress_multiplier;
+    }
+    else
+        h->i_dts_compress_multiplier = 1;
 
     h->sps = &h->sps_array[0];
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
@@ -2388,8 +2400,31 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         pic_out->i_type = X264_TYPE_B;
 
     pic_out->b_keyframe = h->fenc->b_keyframe;
-    pic_out->i_pts = h->fenc->i_pts;
-    pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+
+    pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
+    if( h->frames.i_bframe_delay )
+    {
+        int64_t *i_prev_dts = thread_current->frames.i_prev_dts;
+        if( h->i_frame <= h->frames.i_bframe_delay )
+        {
+            if( h->i_dts_compress_multiplier == 1 )
+                pic_out->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
+            else
+            {
+                /* DTS compression */
+                if( h->i_frame == 1 )
+                    thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
+                pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
+            }
+        }
+        else
+            pic_out->i_dts = i_prev_dts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
+        i_prev_dts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
+    }
+    else
+        pic_out->i_dts = h->fenc->i_reordered_pts;
+    assert( pic_out->i_pts >= pic_out->i_dts );
+
     pic_out->img.i_plane = h->fdec->i_plane;
     for(i = 0; i < 3; i++)
     {
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index 18309e4a..c46aee5e 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -1336,10 +1336,10 @@ void x264_slicetype_decide( x264_t *h )
         {
             int idx = index[h->lookahead->next.list[i]->i_type == X264_TYPE_BREF]++;
             frames[idx] = h->lookahead->next.list[i];
-            frames[idx]->i_dts = h->lookahead->next.list[idx]->i_pts;
+            frames[idx]->i_reordered_pts = h->lookahead->next.list[idx]->i_pts;
         }
         frames[0] = h->lookahead->next.list[bframes];
-        frames[0]->i_dts = h->lookahead->next.list[0]->i_pts;
+        frames[0]->i_reordered_pts = h->lookahead->next.list[0]->i_pts;
         memcpy( h->lookahead->next.list, frames, (bframes+1) * sizeof(x264_frame_t*) );
     }
     for( i = 0; i <= bframes; i++ )
diff --git a/output/flv.c b/output/flv.c
index 8a937cf1..d73466b5 100644
--- a/output/flv.c
+++ b/output/flv.c
@@ -37,8 +37,6 @@ typedef struct
     int64_t i_fps_num;
     int64_t i_fps_den;
     int64_t i_framenum;
-    int     i_init_delay;
-    int     i_delay_time;
 
     uint64_t i_framerate_pos;
     uint64_t i_duration_pos;
@@ -46,8 +44,8 @@ typedef struct
     uint64_t i_bitrate_pos;
 
     uint8_t b_write_length;
-    int64_t i_init_delta;
-    int64_t i_prev_timestamps[2];
+    int64_t i_prev_dts;
+    int64_t i_prev_pts;
 
     int i_timebase_num;
     int i_timebase_den;
@@ -146,10 +144,8 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_flv->i_fps_den = p_param->i_fps_den;
     p_flv->i_timebase_num = p_param->i_timebase_num;
     p_flv->i_timebase_den = p_param->i_timebase_den;
-    p_flv->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
     p_flv->b_vfr_input = p_param->b_vfr_input;
 
-
     return 0;
 }
 
@@ -216,45 +212,29 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     flv_hnd_t *p_flv = handle;
     flv_buffer *c = p_flv->c;
 
-    int64_t dts;
-    int64_t cts;
-    int64_t offset;
-
-    if( !p_flv->i_framenum )
-        p_flv->i_delay_time = p_picture->i_dts;
+    int64_t dts = (int64_t)( (p_picture->i_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t cts = (int64_t)( (p_picture->i_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t offset = cts - dts;
 
-    if( !p_flv->i_init_delay )
-        dts = cts = (int64_t)((p_picture->i_pts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-    else
+    if( p_flv->i_framenum )
     {
-        // Use DTS compression
-        dts = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum == 1 )
-            p_flv->i_init_delta = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum > p_flv->i_init_delay )
+        int64_t prev_dts = (int64_t)( (p_flv->i_prev_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        int64_t prev_cts = (int64_t)( (p_flv->i_prev_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        if( prev_dts == dts )
         {
-            dts = p_flv->i_prev_timestamps[ (p_flv->i_framenum - p_flv->i_init_delay) % p_flv->i_init_delay ];
-            dts = (int64_t)((dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
+            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
+                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
         }
-        else if( p_flv->i_init_delta )
+        if( prev_cts == cts )
         {
-            // Compressed DTSs might not fit in input timescale
-            double compressed_dts;
-            compressed_dts = (p_flv->i_framenum * ((double)p_flv->i_init_delta / (2 * p_flv->i_init_delay)));
-            dts = (int64_t)((compressed_dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
+            fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" is generated by rounding\n"
+                             "               current internal composition framerate: %.6f fps\n", cts, fps );
         }
-
-        p_flv->i_prev_timestamps[ p_flv->i_framenum % p_flv->i_init_delay ] = p_picture->i_dts - p_flv->i_delay_time;
-
-        cts = p_picture->i_pts;
-        cts = (int64_t)((cts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-     }
-
-    offset = cts - dts;
-
-    assert( cts >= dts );
+    }
+    p_flv->i_prev_dts = p_picture->i_dts;
+    p_flv->i_prev_pts = p_picture->i_pts;
 
     // A new frame - write packet header
     x264_put_byte( c, FLV_TAG_TYPE_VIDEO );
diff --git a/output/mp4.c b/output/mp4.c
index 7889e4fe..e3ad9c61 100644
--- a/output/mp4.c
+++ b/output/mp4.c
@@ -34,11 +34,7 @@ typedef struct
     int i_time_res;
     int64_t i_time_inc;
     int i_numframe;
-    int i_init_delay;
     int i_delay_time;
-
-    int64_t i_prev_timestamps[2];
-    int64_t i_init_delta;
 } mp4_hnd_t;
 
 static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
@@ -195,8 +191,6 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_mp4->i_time_res = p_param->i_timebase_den;
     p_mp4->i_time_inc = p_param->i_timebase_num;
 
-    p_mp4->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
-
     p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL,
                                         p_mp4->i_time_res );
 
@@ -282,7 +276,6 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     mp4_hnd_t *p_mp4 = handle;
     int64_t dts;
     int64_t cts;
-    int32_t offset = 0;
 
     memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size );
     p_mp4->p_sample->dataLength += i_size;
@@ -290,27 +283,12 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     if( !p_mp4->i_numframe )
         p_mp4->i_delay_time = p_picture->i_dts * -1;
 
-    if( !p_mp4->i_init_delay )
-        dts = cts = p_picture->i_pts * p_mp4->i_time_inc;
-    else
-    {
-        if( p_mp4->i_numframe <= p_mp4->i_init_delay )
-            dts = p_picture->i_dts + p_mp4->i_delay_time;
-        else
-            dts = p_mp4->i_prev_timestamps[ (p_mp4->i_numframe - p_mp4->i_init_delay) % p_mp4->i_init_delay ] + p_mp4->i_delay_time;
-
-        // unordered pts
-        p_mp4->i_prev_timestamps[ p_mp4->i_numframe % p_mp4->i_init_delay ] = p_picture->i_dts + p_mp4->i_delay_time;
-
-        dts *= p_mp4->i_time_inc;
-        cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
-
-        offset = cts - dts;
-    }
+    dts = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
+    cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
 
     p_mp4->p_sample->IsRAP = p_picture->b_keyframe;
     p_mp4->p_sample->DTS = dts;
-    p_mp4->p_sample->CTS_Offset = offset;
+    p_mp4->p_sample->CTS_Offset = (uint32_t)(cts - dts);
     gf_isom_add_sample( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_mp4->p_sample );
 
     p_mp4->p_sample->dataLength = 0;
diff --git a/x264.c b/x264.c
index db335368..8669cb33 100644
--- a/x264.c
+++ b/x264.c
@@ -683,6 +683,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mp4_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
 #else
         fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
@@ -694,6 +695,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mkv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
     }
     else if( !strcasecmp( ext, "flv" ) )
@@ -701,6 +703,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = flv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 1;
         param->b_repeat_headers = 0;
     }
     else
@@ -1455,6 +1458,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     int64_t second_largest_pts = -1;
     int64_t ticks_per_frame;
     double  duration;
+    int     prev_timebase_den = param->i_timebase_den;
+    int     dts_compress_multiplier;
 
     opt->b_progress &= param->i_log_level < X264_LOG_DEBUG;
     i_frame_total = input.get_frame_total( opt->hin );
@@ -1474,6 +1479,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
 
     x264_encoder_parameters( h, param );
 
+    dts_compress_multiplier = param->i_timebase_den / prev_timebase_den;
+
     if( output.set_param( opt->hout, param ) )
     {
         fprintf( stderr, "x264 [error]: can't set outfile param\n" );
@@ -1528,7 +1535,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
             {
                 if( h->param.i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
-                             i_frame, pic.i_pts, largest_pts );
+                             i_frame, pic.i_pts * dts_compress_multiplier, largest_pts * dts_compress_multiplier );
                 else if( pts_warning_cnt == MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
                 pts_warning_cnt++;
@@ -1583,6 +1590,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         duration = (double)param->i_fps_den / param->i_fps_num;
     else
         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
+    duration *= dts_compress_multiplier;
 
     i_end = x264_mdate();
     input.picture_clean( &pic );
diff --git a/x264.h b/x264.h
index 1223df78..25508640 100644
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
 
 #include <stdarg.h>
 
-#define X264_BUILD 83
+#define X264_BUILD 84
 
 /* x264_t:
  *      opaque handler for encoder */
@@ -316,6 +316,9 @@ typedef struct x264_param_t
     int b_vfr_input;            /* VFR input */
     int i_timebase_num;         /* Timebase numerator */
     int i_timebase_den;         /* Timebase denominator */
+    int b_dts_compress;         /* DTS compression: this algorithm eliminates negative DTS
+                                 * by compressing them to be less than the second PTS.
+                                 * Warning: this will change the timebase! */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
-- 
2.39.2