+ int idx = !b_intra;
+ int last_nonb, cur_nonb = 1;
+ int bframes = 0;
+ int i = num_frames;
+
+ if( b_intra )
+ x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
+
+ while( i > 0 && frames[i]->i_type == X264_TYPE_B )
+ i--;
+ last_nonb = i;
+
+ /* Lookaheadless MB-tree is not a theoretically distinct case; the same extrapolation could
+ * be applied to the end of a lookahead buffer of any size. However, it's most needed when
+ * lookahead=0, so that's what's currently implemented. */
+ if( !h->param.rc.i_lookahead )
+ {
+ if( b_intra )
+ {
+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+ memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+ return;
+ }
+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+ }
+ else
+ {
+ if( last_nonb < idx )
+ return;
+ memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+ }
+
+ while( i-- > idx )
+ {
+ cur_nonb = i;
+ while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
+ cur_nonb--;
+ if( cur_nonb < idx )
+ break;
+ x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
+ memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+ bframes = last_nonb - cur_nonb - 1;
+ if( h->param.i_bframe_pyramid && bframes > 1 )
+ {
+ int middle = (bframes + 1)/2 + cur_nonb;
+ x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle, 0 );
+ memset( frames[middle]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
+ while( i > cur_nonb )
+ {
+ int p0 = i > middle ? middle : cur_nonb;
+ int p1 = i < middle ? middle : last_nonb;
+ if( i != middle )
+ {
+ x264_slicetype_frame_cost( h, a, frames, p0, p1, i, 0 );
+ x264_macroblock_tree_propagate( h, frames, p0, p1, i, 0 );
+ }
+ i--;
+ }
+ x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, middle, 1 );
+ }
+ else
+ {
+ while( i > cur_nonb )
+ {
+ x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
+ x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i, 0 );
+ i--;
+ }
+ }
+ x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb, 1 );
+ last_nonb = cur_nonb;
+ }
+
+ if( !h->param.rc.i_lookahead )
+ {
+ x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
+ }
+
+ x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
+ if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
+ x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
+}
+
+static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
+{
+ int cost = x264_slicetype_frame_cost( h, a, frames, p0, p1, b, 0 );
+ if( h->param.rc.i_aq_mode )
+ {
+ if( h->param.rc.b_mb_tree )
+ return x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
+ else
+ return frames[b]->i_cost_est_aq[b-p0][p1-b];
+ }
+ return cost;
+}
+
+static void x264_calculate_durations( x264_t *h, x264_frame_t *cur_frame, x264_frame_t *prev_frame, int *i_cpb_delay, int *i_coded_fields )
+{
+ cur_frame->i_cpb_delay = *i_cpb_delay;
+ cur_frame->i_dpb_output_delay = cur_frame->i_field_cnt - *i_coded_fields;
+
+ // add a correction term for frame reordering
+ cur_frame->i_dpb_output_delay += h->sps->vui.i_num_reorder_frames*2;
+
+ // fix possible negative dpb_output_delay because of pulldown changes and reordering
+ if( cur_frame->i_dpb_output_delay < 0 )
+ {
+ cur_frame->i_cpb_delay += cur_frame->i_dpb_output_delay;
+ cur_frame->i_dpb_output_delay = 0;
+ if( prev_frame )
+ prev_frame->i_cpb_duration += cur_frame->i_dpb_output_delay;
+ }
+
+ if( cur_frame->b_keyframe )
+ *i_cpb_delay = 0;
+
+ *i_cpb_delay += cur_frame->i_duration;
+ *i_coded_fields += cur_frame->i_duration;
+ cur_frame->i_cpb_duration = cur_frame->i_duration;
+}
+
+static void x264_vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe )
+{
+ int last_nonb = 0, cur_nonb = 1, idx = 0;
+ x264_frame_t *prev_frame = NULL;
+ int prev_frame_idx = 0;
+ while( cur_nonb < num_frames && frames[cur_nonb]->i_type == X264_TYPE_B )
+ cur_nonb++;
+ int next_nonb = keyframe ? last_nonb : cur_nonb;
+
+ if( frames[cur_nonb]->i_coded_fields_lookahead >= 0 )
+ {
+ h->i_coded_fields_lookahead = frames[cur_nonb]->i_coded_fields_lookahead;
+ h->i_cpb_delay_lookahead = frames[cur_nonb]->i_cpb_delay_lookahead;
+ }
+
+ while( cur_nonb < num_frames )
+ {
+ /* P/I cost: This shouldn't include the cost of next_nonb */
+ if( next_nonb != cur_nonb )
+ {
+ int p0 = IS_X264_TYPE_I( frames[cur_nonb]->i_type ) ? cur_nonb : last_nonb;
+ frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb );
+ frames[next_nonb]->i_planned_type[idx] = frames[cur_nonb]->i_type;
+ frames[cur_nonb]->i_coded_fields_lookahead = h->i_coded_fields_lookahead;
+ frames[cur_nonb]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead;
+ x264_calculate_durations( h, frames[cur_nonb], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
+ if( prev_frame )
+ {
+ frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration *
+ h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+ }
+ frames[next_nonb]->f_planned_cpb_duration[idx] = (double)frames[cur_nonb]->i_cpb_duration *
+ h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+ prev_frame = frames[cur_nonb];
+ prev_frame_idx = idx;
+ idx++;
+ }
+ /* Handle the B-frames: coded order */
+ for( int i = last_nonb+1; i < cur_nonb; i++, idx++ )
+ {
+ frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i );
+ frames[next_nonb]->i_planned_type[idx] = X264_TYPE_B;
+ frames[i]->i_coded_fields_lookahead = h->i_coded_fields_lookahead;
+ frames[i]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead;
+ x264_calculate_durations( h, frames[i], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
+ if( prev_frame )
+ {
+ frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration *
+ h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+ }
+ frames[next_nonb]->f_planned_cpb_duration[idx] = (double)frames[i]->i_cpb_duration *
+ h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+ prev_frame = frames[i];
+ prev_frame_idx = idx;
+ }
+ last_nonb = cur_nonb;
+ cur_nonb++;
+ while( cur_nonb <= num_frames && frames[cur_nonb]->i_type == X264_TYPE_B )
+ cur_nonb++;
+ }
+ frames[next_nonb]->i_planned_type[idx] = X264_TYPE_AUTO;
+}
+
+static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
+{
+ int loc = 1;
+ int cost = 0;
+ int cur_p = 0;
+ path--; /* Since the 1st path element is really the second frame */
+ while( path[loc] )
+ {
+ int next_p = loc;
+ /* Find the location of the next P-frame. */
+ while( path[next_p] != 'P' )
+ next_p++;
+
+ /* Add the cost of the P-frame found above */
+ cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_p, 0 );
+ /* Early terminate if the cost we have found is larger than the best path cost so far */
+ if( cost > threshold )
+ break;
+
+ if( h->param.i_bframe_pyramid && next_p - cur_p > 2 )
+ {
+ int middle = cur_p + (next_p - cur_p)/2;
+ cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, middle, 0 );
+ for( int next_b = loc; next_b < middle && cost < threshold; next_b++ )
+ cost += x264_slicetype_frame_cost( h, a, frames, cur_p, middle, next_b, 0 );
+ for( int next_b = middle+1; next_b < next_p && cost < threshold; next_b++ )
+ cost += x264_slicetype_frame_cost( h, a, frames, middle, next_p, next_b, 0 );
+ }
+ else
+ for( int next_b = loc; next_b < next_p && cost < threshold; next_b++ )
+ cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_b, 0 );
+
+ loc = next_p + 1;
+ cur_p = next_p;
+ }
+ return cost;
+}
+
+/* Viterbi/trellis slicetype decision algorithm. */
+/* Uses strings due to the fact that the speed of the control functions is
+ negligible compared to the cost of running slicetype_frame_cost, and because
+ it makes debugging easier. */
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX] )
+{
+ char paths[2][X264_LOOKAHEAD_MAX];
+ int num_paths = X264_MIN( h->param.i_bframe+1, length );
+ int best_cost = COST_MAX;
+ int idx = 0;
+
+ /* Iterate over all currently possible paths */
+ for( int path = 0; path < num_paths; path++ )
+ {
+ /* Add suffixes to the current path */
+ int len = length - (path + 1);
+ memcpy( paths[idx], best_paths[len % (X264_BFRAME_MAX+1)], len );
+ memset( paths[idx]+len, 'B', path );
+ strcpy( paths[idx]+len+path, "P" );
+
+ /* Calculate the actual cost of the current path */
+ int cost = x264_slicetype_path_cost( h, a, frames, paths[idx], best_cost );
+ if( cost < best_cost )
+ {
+ best_cost = cost;
+ idx ^= 1;
+ }
+ }
+
+ /* Store the best path. */
+ memcpy( best_paths[length % (X264_BFRAME_MAX+1)], paths[idx^1], length );
+}
+
+static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int print )
+{
+ x264_frame_t *frame = frames[p1];
+ x264_slicetype_frame_cost( h, a, frames, p0, p1, p1, 0 );
+