VFR/framerate-aware ratecontrol, part 2

author Fiona Glaser <fiona@x264.com>

Fri, 24 Dec 2010 00:33:01 +0000 (19:33 -0500)

committer Fiona Glaser <fiona@x264.com>

Mon, 10 Jan 2011 20:28:08 +0000 (12:28 -0800)
author Fiona Glaser <fiona@x264.com>
Fri, 24 Dec 2010 00:33:01 +0000 (19:33 -0500)
committer Fiona Glaser <fiona@x264.com>
Mon, 10 Jan 2011 20:28:08 +0000 (12:28 -0800)
diff --git a/common/mc.c b/common/mc.c

index 5f8c260bdcaa8b4d5520aa8d4eec52a20f5f4604..b0b38e92e651c26edb30a0e3de5f71e9a27d57b4 100644 (file)
--- a/common/mc.c
+++ b/common/mc.c
@@ -431,30 +431,19 @@ static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel
      }
  }
  
-#if defined(__GNUC__) && (ARCH_X86 || ARCH_X86_64)
-// gcc isn't smart enough to use the "idiv" instruction
-static ALWAYS_INLINE int32_t div_64_32(int64_t x, int32_t y)
-{
-    int32_t quotient, remainder;
-    asm("idiv %4"
-        :"=a"(quotient), "=d"(remainder)
-        :"a"((uint32_t)x), "d"((int32_t)(x>>32)), "r"(y)
-    );
-    return quotient;
-}
-#else
-#define div_64_32(x,y) ((x)/(y))
-#endif
-
  /* Estimate the total amount of influence on future quality that could be had if we
   * were to improve the reference samples used to inter predict any given macroblock. */
  static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
-                                   uint16_t *inter_costs, uint16_t *inv_qscales, int len )
+                                   uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
  {
+    float fps = *fps_factor / 256.f;
      for( int i = 0; i < len; i++ )
      {
-        int propagate_amount = propagate_in[i] + ((intra_costs[i] * inv_qscales[i] + 128)>>8);
-        dst[i] = div_64_32((int64_t)propagate_amount * (intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK)), intra_costs[i]);
+        float intra_cost       = intra_costs[i] * inv_qscales[i];
+        float propagate_amount = propagate_in[i] + intra_cost*fps;
+        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
+        float propagate_denom  = intra_costs[i];
+        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
      }
  }
  
diff --git a/common/mc.h b/common/mc.h

index 92d0ded53b454cc07c95b4ce7fe992ac88825499..2a96fa6af56e74fe7e74adc174088f1a20ce47d4 100644 (file)
--- a/common/mc.h
+++ b/common/mc.h
@@ -123,7 +123,7 @@ typedef struct
      void (*weight_cache)( x264_t *, x264_weight_t * );
  
      void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
-                                   uint16_t *inter_costs, uint16_t *inv_qscales, int len );
+                                   uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
  } x264_mc_functions_t;
  
  void x264_mc_init( int cpu, x264_mc_functions_t *pf );
diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm

index d2c7fe69025a105ec80d4b2c503aeae89dd46b3f..f01856d192c0657be7462e7ef11f6df537c12749 100644 (file)
--- a/common/x86/const-a.asm
+++ b/common/x86/const-a.asm
@@ -51,7 +51,6 @@ const pw_pixel_max,times 8 dw ((1 << BIT_DEPTH)-1)
  
  const pd_1,        times 4 dd 1
  const pd_32,       times 4 dd 32
-const pd_128,      times 4 dd 128
  const pd_ffff,     times 4 dd 0xffff
  const pw_00ff,     times 8 dw 0x00ff
  const pw_ff00,     times 8 dw 0xff00
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm

index 3a1ea14f2db20e5d856b0cc14958cb020d1fbe5d..bb639fe2f764b1f1cde29a5da784d571c0c27946 100644 (file)
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -40,6 +40,7 @@ deinterleave_shuf: db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
  
  pd_16: times 4 dd 16
  pd_0f: times 4 dd 0xffff
+pf_inv256: times 4 dd 0.00390625
  
  pad10: times 8 dw    10*PIXEL_MAX
  pad20: times 8 dw    20*PIXEL_MAX
@@ -59,7 +60,6 @@ cextern pw_32
  cextern pw_00ff
  cextern pw_3fff
  cextern pw_pixel_max
-cextern pd_128
  cextern pd_ffff
  
  %macro LOAD_ADD 4
@@ -1649,47 +1649,49 @@ FRAME_INIT_LOWRES ssse3
  
  ;-----------------------------------------------------------------------------
  ; void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
-;                             uint16_t *inter_costs, uint16_t *inv_qscales, int len )
+;                             uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
  ;-----------------------------------------------------------------------------
-cglobal mbtree_propagate_cost_sse2, 6,6,7
-    shl r5d, 1
-    lea r0, [r0+r5*2]
-    add r1, r5
-    add r2, r5
-    add r3, r5
-    add r4, r5
-    neg r5
-    pxor      xmm5, xmm5
-    movdqa    xmm6, [pw_3fff]
-    movdqa    xmm4, [pd_128]
+cglobal mbtree_propagate_cost_sse2, 7,7,7
+    shl        r6d, 1
+    lea         r0, [r0+r6*2]
+    add         r1, r6
+    add         r2, r6
+    add         r3, r6
+    add         r4, r6
+    neg         r6
+    pxor      xmm4, xmm4
+    movss     xmm6, [r5]
+    shufps    xmm6, xmm6, 0
+    mulps     xmm6, [pf_inv256]
+    movdqa    xmm5, [pw_3fff]
  .loop:
-    movq      xmm2, [r2+r5] ; intra
-    movq      xmm0, [r4+r5] ; invq
-    movq      xmm3, [r3+r5] ; inter
-    movq      xmm1, [r1+r5] ; prop
-    punpcklwd xmm2, xmm5
-    punpcklwd xmm0, xmm5
+    movq      xmm2, [r2+r6] ; intra
+    movq      xmm0, [r4+r6] ; invq
+    movq      xmm3, [r3+r6] ; inter
+    movq      xmm1, [r1+r6] ; prop
+    punpcklwd xmm2, xmm4
+    punpcklwd xmm0, xmm4
      pmaddwd   xmm0, xmm2
-    pand      xmm3, xmm6
-    punpcklwd xmm1, xmm5
-    punpcklwd xmm3, xmm5
-    paddd     xmm0, xmm4
-    psrld     xmm0, 8       ; intra*invq>>8
-    paddd     xmm0, xmm1    ; prop + (intra*invq>>8)
+    pand      xmm3, xmm5
+    punpcklwd xmm1, xmm4
+    punpcklwd xmm3, xmm4
+    cvtdq2ps  xmm0, xmm0
+    mulps     xmm0, xmm6    ; intra*invq*fps_factor>>8
+    cvtdq2ps  xmm1, xmm1    ; prop
+    addps     xmm0, xmm1    ; prop + (intra*invq*fps_factor>>8)
      cvtdq2ps  xmm1, xmm2    ; intra
      psubd     xmm2, xmm3    ; intra - inter
+    cvtdq2ps  xmm2, xmm2    ; intra - inter
      rcpps     xmm3, xmm1    ; 1 / intra 1st approximation
-    cvtdq2ps  xmm0, xmm0
      mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)
-    cvtdq2ps  xmm2, xmm2
      mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)^2
-    mulps     xmm0, xmm2    ; (prop + (intra*invq>>8)) * (intra - inter)
+    mulps     xmm0, xmm2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
      addps     xmm3, xmm3    ; 2 * (1/intra 1st approx)
      subps     xmm3, xmm1    ; 2nd approximation for 1/intra
      mulps     xmm0, xmm3    ; / intra
-    cvttps2dq xmm0, xmm0    ; truncation isn't really desired, but matches the integer implementation
-    movdqa [r0+r5*2], xmm0
-    add r5, 8
+    cvtps2dq  xmm0, xmm0
+    movdqa [r0+r6*2], xmm0
+    add         r6, 8
      jl .loop
      REP_RET
  
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c

index 881f2d7770d8aa94862d9d4c57315fa82afd54f4..cdd9d572279a38794342641b8ed59aa5f50e39bc 100644 (file)
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -124,7 +124,7 @@ void x264_integral_init8v_mmx( uint16_t *sum8, int stride );
  void x264_integral_init8v_sse2( uint16_t *sum8, int stride );
  void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
  void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
-                                      uint16_t *inter_costs, uint16_t *inv_qscales, int len );
+                                      uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
  
  #define MC_CHROMA(cpu)\
  void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, int i_dst,\
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index 4de6ab513aeb1f15dae0e3aa22afdd10a108a3b1..2d5871504668886bb39f47ad113f6abc6aa24a07 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1603,9 +1603,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
              rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * fabs( h->param.rc.f_pb_factor ));
          }
          rc->cplxr_sum *= rc->cbr_decay;
-        double frame_duration = (double)h->fenc->i_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
-
-        rc->wanted_bits_window += frame_duration * rc->bitrate;
+        rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate;
          rc->wanted_bits_window *= rc->cbr_decay;
      }
  
@@ -2184,7 +2182,7 @@ static float rate_estimate_qscale( x264_t *h )
              rcc->last_satd = x264_rc_analyse_slice( h );
              rcc->short_term_cplxsum *= 0.5;
              rcc->short_term_cplxcount *= 0.5;
-            rcc->short_term_cplxsum += rcc->last_satd;
+            rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION);
              rcc->short_term_cplxcount ++;
  
              rce.tex_bits = rcc->last_satd;
@@ -2541,10 +2539,11 @@ static int init_pass2( x264_t *h )
  {
      x264_ratecontrol_t *rcc = h->rc;
      uint64_t all_const_bits = 0;
+    double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
      double duration = 0;
      for( int i = 0; i < rcc->num_entries; i++ )
          duration += rcc->entry[i].i_duration;
-    duration *= (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+    duration *= timescale;
      uint64_t all_available_bits = h->param.rc.i_bitrate * 1000. * duration;
      double rate_factor, step_mult;
      double qblur = h->param.rc.f_qblur;
@@ -2583,21 +2582,23 @@ static int init_pass2( x264_t *h )
          for( int j = 1; j < cplxblur*2 && j < rcc->num_entries-i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i+j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits(rcj, 1) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
          }
          /* weighted average of cplx of past frames */
          weight = 1.0;
          for( int j = 0; j <= cplxblur*2 && j <= i; j++ )
          {
              ratecontrol_entry_t *rcj = &rcc->entry[i-j];
+            double frame_duration = CLIP_DURATION(rcj->i_duration * timescale) / BASE_FRAME_DURATION;
              gaussian_weight = weight * exp( -j*j/200.0 );
              weight_sum += gaussian_weight;
-            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits);
+            cplx_sum += gaussian_weight * (qscale2bits( rcj, 1 ) - rcj->misc_bits) / frame_duration;
              weight *= 1 - pow( (float)rcj->i_count / rcc->nmb, 2 );
              if( weight < .0001 )
                  break;
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h

index 03c82cb0dc58f1786abf4190a6890188d242a31f..28e6a3d7384588dbcd4424503762d7e19bc25bbb 100644 (file)
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -27,6 +27,16 @@
  #ifndef X264_RATECONTROL_H
  #define X264_RATECONTROL_H
  
+/* Completely arbitrary.  Ratecontrol lowers relative quality at higher framerates
+ * and the reverse at lower framerates; this serves as the center of the curve. */
+#define BASE_FRAME_DURATION (0.04f)
+
+/* Arbitrary limitations as a sanity check. */
+#define MAX_FRAME_DURATION 1.00f
+#define MIN_FRAME_DURATION 0.01f
+
+#define CLIP_DURATION(f) x264_clip3f(f,MIN_FRAME_DURATION,MAX_FRAME_DURATION)
+
  int  x264_ratecontrol_new   ( x264_t * );
  void x264_ratecontrol_delete( x264_t * );
  
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index cc0193bb684e02111a334461a9305724366bff3e..97cf61e3e7a3b6677087e64deeda72cf760d3874 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -748,9 +748,10 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **fram
      return i_score;
  }
  
-static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int ref0_distance )
+static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
  {
-    x264_emms();
+    int fps_factor_intra     = round( CLIP_DURATION(frame->f_duration) / BASE_FRAME_DURATION * 256 );
+    int fps_factor_propagate = round( CLIP_DURATION( average_duration) / BASE_FRAME_DURATION * 256 );
      float weightdelta = 0.0;
      if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
          weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -760,17 +761,18 @@ static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int ref
      float strength = 5.0f * (1.0f - h->param.rc.f_qcompress);
      for( int mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
      {
-        int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index]+128)>>8;
+        int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index] + 128) >> 8;
+        int intra_cost_scaled = (intra_cost * fps_factor_intra + 128) >> 8;
          if( intra_cost )
          {
-            int propagate_cost = frame->i_propagate_cost[mb_index];
-            float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost) + weightdelta;
+            int propagate_cost = (frame->i_propagate_cost[mb_index] * fps_factor_propagate + 128) >> 8;
+            float log2_ratio = x264_log2(intra_cost_scaled + propagate_cost) - x264_log2(intra_cost) + weightdelta;
              frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
          }
      }
  }
  
-static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b, int referenced )
+static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced )
  {
      uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost};
      int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
@@ -780,6 +782,9 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
      int *buf = h->scratch_buffer;
      uint16_t *propagate_cost = frames[b]->i_propagate_cost;
  
+    x264_emms();
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
+
      /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
      if( !referenced )
          memset( frames[b]->i_propagate_cost, 0, h->mb.i_mb_width * sizeof(uint16_t) );
@@ -789,7 +794,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
          int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
          h->mc.mbtree_propagate_cost( buf, propagate_cost,
              frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
-            frames[b]->i_inv_qscale_factor+mb_index, h->mb.i_mb_width );
+            frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
          if( referenced )
              propagate_cost += h->mb.i_mb_width;
          for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
@@ -858,7 +863,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
      }
  
      if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
-        x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
+        x264_macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 );
  }
  
  static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
@@ -866,6 +871,13 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
      int idx = !b_intra;
      int last_nonb, cur_nonb = 1;
      int bframes = 0;
+
+    x264_emms();
+    float total_duration = 0.0;
+    for( int j = 0; j <= num_frames; j++ )
+        total_duration += frames[j]->f_duration;
+    float average_duration = total_duration / (num_frames + 1);
+
      int i = num_frames;
  
      if( b_intra )
@@ -918,34 +930,34 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
                  if( i != middle )
                  {
                      x264_slicetype_frame_cost( h, a, frames, p0, p1, i, 0 );
-                    x264_macroblock_tree_propagate( h, frames, p0, p1, i, 0 );
+                    x264_macroblock_tree_propagate( h, frames, average_duration, p0, p1, i, 0 );
                  }
                  i--;
              }
-            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, middle, 1 );
+            x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, middle, 1 );
          }
          else
          {
              while( i > cur_nonb )
              {
                  x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
-                x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i, 0 );
+                x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, i, 0 );
                  i--;
              }
          }
-        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb, 1 );
+        x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, last_nonb, 1 );
          last_nonb = cur_nonb;
      }
  
      if( !h->param.rc.i_lookahead )
      {
-        x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
+        x264_macroblock_tree_propagate( h, frames, average_duration, 0, last_nonb, last_nonb, 1 );
          XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
      }
  
-    x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
+    x264_macroblock_tree_finish( h, frames[last_nonb], average_duration, last_nonb );
      if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
-        x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
+        x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], average_duration, 0 );
  }
  
  static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
diff --git a/tools/checkasm.c b/tools/checkasm.c

index c552ab9b595eb6e247928cbecfc2526a1f7d196c..7c7faa72305ca60711c64df742bccb2f6549242b 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1236,29 +1236,34 @@ static int check_mc( int cpu_ref, int cpu_new )
  
      if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
      {
-        ok = 1; used_asm = 1;
-        set_func_name( "mbtree_propagate" );
-        int *dsta = (int*)buf3;
-        int *dstc = dsta+400;
-        uint16_t *prop = (uint16_t*)buf1;
-        uint16_t *intra = (uint16_t*)buf4;
-        uint16_t *inter = intra+400;
-        uint16_t *qscale = inter+400;
-        uint16_t *rnd = (uint16_t*)buf2;
          x264_emms();
-        for( int i = 0; i < 400; i++ )
+        for( int i = 0; i < 10; i++ )
          {
-            intra[i]  = *rnd++ & 0x7fff;
-            intra[i] += !intra[i];
-            inter[i]  = *rnd++ & 0x7fff;
-            qscale[i] = *rnd++ & 0x7fff;
+            float fps_factor = (rand()&65535) / 256.;
+            ok = 1; used_asm = 1;
+            set_func_name( "mbtree_propagate" );
+            int *dsta = (int*)buf3;
+            int *dstc = dsta+400;
+            uint16_t *prop = (uint16_t*)buf1;
+            uint16_t *intra = (uint16_t*)buf4;
+            uint16_t *inter = intra+100;
+            uint16_t *qscale = inter+100;
+            uint16_t *rnd = (uint16_t*)buf2;
+            x264_emms();
+            for( int j = 0; j < 100; j++ )
+            {
+                intra[j]  = *rnd++ & 0x7fff;
+                intra[j] += !intra[j];
+                inter[j]  = *rnd++ & 0x7fff;
+                qscale[j] = *rnd++ & 0x7fff;
+            }
+            call_c( mc_c.mbtree_propagate_cost, dstc, prop, intra, inter, qscale, &fps_factor, 100 );
+            call_a( mc_a.mbtree_propagate_cost, dsta, prop, intra, inter, qscale, &fps_factor, 100 );
+            // I don't care about exact rounding, this is just how close the floating-point implementation happens to be
+            x264_emms();
+            for( int j = 0; j < 100; j++ )
+                ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
          }
-        call_c( mc_c.mbtree_propagate_cost, dstc, prop, intra, inter, qscale, 400 );
-        call_a( mc_a.mbtree_propagate_cost, dsta, prop, intra, inter, qscale, 400 );
-        // I don't care about exact rounding, this is just how close the floating-point implementation happens to be
-        x264_emms();
-        for( int i = 0; i < 400; i++ )
-            ok &= abs( dstc[i]-dsta[i] ) <= 1 || fabs( (double)dstc[i]/dsta[i]-1 ) < 1e-6;
          report( "mbtree propagate :" );
      }
author	Fiona Glaser <fiona@x264.com>
	Fri, 24 Dec 2010 00:33:01 +0000 (19:33 -0500)
committer	Fiona Glaser <fiona@x264.com>
	Mon, 10 Jan 2011 20:28:08 +0000 (12:28 -0800)
common/mc.c		patch \| blob \| history
common/mc.h		patch \| blob \| history
common/x86/const-a.asm		patch \| blob \| history
common/x86/mc-a2.asm		patch \| blob \| history
common/x86/mc-c.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/ratecontrol.h		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history