+static int x264_macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc )
+{
+ /* Use fractional QP array dimensions to compensate for edge padding */
+ float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f};
+ float dstdim[2] = { h->param.i_width / 16.f, h->param.i_height / 16.f};
+ int srcdimi[2] = {ceil(srcdim[0]), ceil(srcdim[1])};
+ int dstdimi[2] = {ceil(dstdim[0]), ceil(dstdim[1])};
+ if( PARAM_INTERLACED )
+ {
+ srcdimi[1] = (srcdimi[1]+1)&~1;
+ dstdimi[1] = (dstdimi[1]+1)&~1;
+ }
+
+ rc->mbtree.src_mb_count = srcdimi[0] * srcdimi[1];
+
+ CHECKED_MALLOC( rc->mbtree.qp_buffer[0], rc->mbtree.src_mb_count * sizeof(uint16_t) );
+ if( h->param.i_bframe_pyramid && h->param.rc.b_stat_read )
+ CHECKED_MALLOC( rc->mbtree.qp_buffer[1], rc->mbtree.src_mb_count * sizeof(uint16_t) );
+ rc->mbtree.qpbuf_pos = -1;
+
+ /* No rescaling to do */
+ if( srcdimi[0] == dstdimi[0] && srcdimi[1] == dstdimi[1] )
+ return 0;
+
+ rc->mbtree.rescale_enabled = 1;
+
+ /* Allocate intermediate scaling buffers */
+ CHECKED_MALLOC( rc->mbtree.scale_buffer[0], srcdimi[0] * srcdimi[1] * sizeof(float) );
+ CHECKED_MALLOC( rc->mbtree.scale_buffer[1], dstdimi[0] * srcdimi[1] * sizeof(float) );
+
+ /* Allocate and calculate resize filter parameters and coefficients */
+ for( int i = 0; i < 2; i++ )
+ {
+ if( srcdim[i] > dstdim[i] ) // downscale
+ rc->mbtree.filtersize[i] = 1 + (2 * srcdimi[i] + dstdimi[i] - 1) / dstdimi[i];
+ else // upscale
+ rc->mbtree.filtersize[i] = 3;
+
+ CHECKED_MALLOC( rc->mbtree.coeffs[i], rc->mbtree.filtersize[i] * dstdimi[i] * sizeof(float) );
+ CHECKED_MALLOC( rc->mbtree.pos[i], dstdimi[i] * sizeof(int) );
+
+ /* Initialize filter coefficients */
+ float inc = srcdim[i] / dstdim[i];
+ float dmul = inc > 1.f ? dstdim[i] / srcdim[i] : 1.f;
+ float dstinsrc = 0.5f * inc - 0.5f;
+ int filtersize = rc->mbtree.filtersize[i];
+ for( int j = 0; j < dstdimi[i]; j++ )
+ {
+ int pos = dstinsrc - (filtersize - 2.f) * 0.5f;
+ float sum = 0.0;
+ rc->mbtree.pos[i][j] = pos;
+ for( int k = 0; k < filtersize; k++ )
+ {
+ float d = fabs( pos + k - dstinsrc ) * dmul;
+ float coeff = X264_MAX( 1.f - d, 0 );
+ rc->mbtree.coeffs[i][j * filtersize + k] = coeff;
+ sum += coeff;
+ }
+ sum = 1.0f / sum;
+ for( int k = 0; k < filtersize; k++ )
+ rc->mbtree.coeffs[i][j * filtersize + k] *= sum;
+ dstinsrc += inc;
+ }
+ }
+
+ /* Write back actual qp array dimensions */
+ rc->mbtree.srcdim[0] = srcdimi[0];
+ rc->mbtree.srcdim[1] = srcdimi[1];
+ return 0;
+fail:
+ return -1;
+}
+
+static void x264_macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc )
+{
+ for( int i = 0; i < 2; i++ )
+ {
+ x264_free( rc->mbtree.qp_buffer[i] );
+ x264_free( rc->mbtree.scale_buffer[i] );
+ x264_free( rc->mbtree.coeffs[i] );
+ x264_free( rc->mbtree.pos[i] );
+ }
+}
+
+static ALWAYS_INLINE float tapfilter( float *src, int pos, int max, int stride, float *coeff, int filtersize )
+{
+ float sum = 0.f;
+ for( int i = 0; i < filtersize; i++, pos++ )
+ sum += src[x264_clip3( pos, 0, max-1 )*stride] * coeff[i];
+ return sum;
+}
+
+static void x264_macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst )
+{
+ float *input, *output;
+ int filtersize, stride, height;
+
+ /* H scale first */
+ input = rc->mbtree.scale_buffer[0];
+ output = rc->mbtree.scale_buffer[1];
+ filtersize = rc->mbtree.filtersize[0];
+ stride = rc->mbtree.srcdim[0];
+ height = rc->mbtree.srcdim[1];
+ for( int y = 0; y < height; y++, input += stride, output += h->mb.i_mb_width )
+ {
+ float *coeff = rc->mbtree.coeffs[0];
+ for( int x = 0; x < h->mb.i_mb_width; x++, coeff+=filtersize )
+ output[x] = tapfilter( input, rc->mbtree.pos[0][x], stride, 1, coeff, filtersize );
+ }
+
+ /* V scale next */
+ input = rc->mbtree.scale_buffer[1];
+ output = dst;
+ filtersize = rc->mbtree.filtersize[1];
+ stride = h->mb.i_mb_width;
+ height = rc->mbtree.srcdim[1];
+ for( int x = 0; x < h->mb.i_mb_width; x++, input++, output++ )
+ {
+ float *coeff = rc->mbtree.coeffs[1];
+ for( int y = 0; y < h->mb.i_mb_height; y++, coeff+=filtersize )
+ output[y*stride] = tapfilter( input, rc->mbtree.pos[1][y], height, stride, coeff, filtersize );
+ }
+}
+