//FIXME pass the copy cleanly ?
// memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
- ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
+ ff_spatial_dwt(buffer, s->temp_dwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
for(level=0; level<s->spatial_decomposition_count; level++){
for(orientation=level ? 1 : 0; orientation<4; orientation++){
for(xs= 0; xs<Q2_STEP; xs++){
memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
dequantize_all(s, p, idwt2_buffer, width, height);
- ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+ ff_spatial_idwt(idwt2_buffer, s->temp_idwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
for(y=ys; y<b->height; y+= Q2_STEP){
}
}
dequantize_all(s, p, idwt2_buffer, width, height);
- ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+ ff_spatial_idwt(idwt2_buffer, s->temp_idwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
for(y=ys; y<b->height; y+= Q2_STEP){
for(x=xs; x<b->width; x+= Q2_STEP){
avctx->coded_frame= &s->current_picture;
switch(avctx->pix_fmt){
-// case PIX_FMT_YUV444P:
+ case PIX_FMT_YUV444P:
// case PIX_FMT_YUV422P:
case PIX_FMT_YUV420P:
// case PIX_FMT_GRAY8:
// case PIX_FMT_YUV411P:
-// case PIX_FMT_YUV410P:
+ case PIX_FMT_YUV410P:
s->colorspace_type= 0;
break;
/* case PIX_FMT_RGB32:
av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
return -1;
}
-// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
- s->chroma_h_shift= 1;
- s->chroma_v_shift= 1;
+ avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
}
//near copy & paste from dsputil, FIXME
-static int pix_sum(uint8_t * pix, int line_size, int w)
+static int pix_sum(uint8_t * pix, int line_size, int w, int h)
{
int s, i, j;
s = 0;
- for (i = 0; i < w; i++) {
+ for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
s += pix[0];
pix ++;
const int stride= s->current_picture.linesize[0];
const int uvstride= s->current_picture.linesize[1];
uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
- s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
- s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
+ s->input_picture.data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
+ s->input_picture.data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
int P[10][2];
int16_t last_mv[3][2];
int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
block_s= block_w*block_w;
- sum = pix_sum(current_data[0], stride, block_w);
+ sum = pix_sum(current_data[0], stride, block_w, block_w);
l= (sum + block_s/2)/block_s;
iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
- block_s= block_w*block_w>>2;
- sum = pix_sum(current_data[1], uvstride, block_w>>1);
+ block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
+ sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
cb= (sum + block_s/2)/block_s;
// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
- sum = pix_sum(current_data[2], uvstride, block_w>>1);
+ sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
cr= (sum + block_s/2)/block_s;
// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
int i, x2, y2;
Plane *p= &s->plane[plane_index];
const int block_size = MB_SIZE >> s->block_max_depth;
- const int block_w = plane_index ? block_size/2 : block_size;
- const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+1] : ff_obmc_tab[s->block_max_depth];
- const int obmc_stride= plane_index ? block_size : 2*block_size;
+ const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
+ const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
+ const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+ const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
const int ref_stride= s->current_picture.linesize[plane_index];
uint8_t *src= s-> input_picture.data[plane_index];
IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
int ab=0;
int aa=0;
+ av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
+
b->type|= BLOCK_INTRA;
b->color[plane_index]= 0;
memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
int mb_x2= mb_x + (i &1) - 1;
int mb_y2= mb_y + (i>>1) - 1;
int x= block_w*mb_x2 + block_w/2;
- int y= block_w*mb_y2 + block_w/2;
+ int y= block_h*mb_y2 + block_h/2;
- add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
- x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
+ add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
+ x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
- for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
+ for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){
for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
- int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
+ int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
int obmc_v= obmc[index];
int d;
- if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
+ if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
if(x<0) obmc_v += obmc[index + block_w];
- if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
+ if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
if(x+block_w>w) obmc_v += obmc[index - block_w];
//FIXME precalculate this or simplify it somehow else
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
Plane *p= &s->plane[plane_index];
const int block_size = MB_SIZE >> s->block_max_depth;
- const int block_w = plane_index ? block_size/2 : block_size;
- const int obmc_stride= plane_index ? block_size : 2*block_size;
+ const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
+ const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
+ const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
const int ref_stride= s->current_picture.linesize[plane_index];
uint8_t *dst= s->current_picture.data[plane_index];
uint8_t *src= s-> input_picture.data[plane_index];
int rate= 0;
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
int sx= block_w*mb_x - block_w/2;
- int sy= block_w*mb_y - block_w/2;
+ int sy= block_h*mb_y - block_h/2;
int x0= FFMAX(0,-sx);
int y0= FFMAX(0,-sy);
int x1= FFMIN(block_w*2, w-sx);
- int y1= FFMIN(block_w*2, h-sy);
+ int y1= FFMIN(block_h*2, h-sy);
int i,x,y;
- ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
+ av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
+
+ ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
for(y=y0; y<y1; y++){
const uint8_t *obmc1= obmc_edged + y*obmc_stride;
else
x0 = block_w;
if(mb_y == 0)
- y1 = block_w;
+ y1 = block_h;
else
- y0 = block_w;
+ y0 = block_h;
for(y=y0; y<y1; y++)
memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
}
int i, y2;
Plane *p= &s->plane[plane_index];
const int block_size = MB_SIZE >> s->block_max_depth;
- const int block_w = plane_index ? block_size/2 : block_size;
- const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+1] : ff_obmc_tab[s->block_max_depth];
- const int obmc_stride= plane_index ? block_size : 2*block_size;
+ const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
+ const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
+ const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+ const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
const int ref_stride= s->current_picture.linesize[plane_index];
uint8_t *dst= s->current_picture.data[plane_index];
uint8_t *src= s-> input_picture.data[plane_index];
int rate= 0;
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+ av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
+
for(i=0; i<9; i++){
int mb_x2= mb_x + (i%3) - 1;
int mb_y2= mb_y + (i/3) - 1;
int x= block_w*mb_x2 + block_w/2;
- int y= block_w*mb_y2 + block_w/2;
+ int y= block_h*mb_y2 + block_h/2;
add_yblock(s, 0, NULL, zero_dst, dst, obmc,
- x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
+ x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
//FIXME find a cleaner/simpler way to skip the outside stuff
for(y2= y; y2<0; y2++)
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
- for(y2= h; y2<y+block_w; y2++)
+ for(y2= h; y2<y+block_h; y2++)
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
if(x<0){
- for(y2= y; y2<y+block_w; y2++)
+ for(y2= y; y2<y+block_h; y2++)
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
}
if(x+block_w > w){
- for(y2= y; y2<y+block_w; y2++)
+ for(y2= y; y2<y+block_h; y2++)
memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
}
assert(block_w== 8 || block_w==16);
- distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
+ distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
}
if(plane_index==0){
uint8_t *dst= s->current_picture.data[0];
const int stride= s->current_picture.linesize[0];
const int block_w= MB_SIZE >> s->block_max_depth;
+ const int block_h= MB_SIZE >> s->block_max_depth;
const int sx= block_w*mb_x - block_w/2;
- const int sy= block_w*mb_y - block_w/2;
+ const int sy= block_h*mb_y - block_h/2;
const int w= s->plane[0].width;
const int h= s->plane[0].height;
int y;
for(y=sy; y<0; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
- for(y=h; y<sy+block_w*2; y++)
+ for(y=h; y<sy+block_h*2; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
if(sx<0){
- for(y=sy; y<sy+block_w*2; y++)
+ for(y=sy; y<sy+block_h*2; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
}
if(sx+block_w*2 > w){
- for(y=sy; y<sy+block_w*2; y++)
+ for(y=sy; y<sy+block_h*2; y++)
memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
}
}
memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
- ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+ ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
for(y=0; y<height; y++){
for(x=0; x<width; x++){
int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
for(i=0; i<3; i++){
- int shift= !!i;
- for(y=0; y<(height>>shift); y++)
+ int hshift= i ? s->chroma_h_shift : 0;
+ int vshift= i ? s->chroma_v_shift : 0;
+ for(y=0; y<(height>>vshift); y++)
memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
&pict->data[i][y * pict->linesize[i]],
- width>>shift);
+ width>>hshift);
}
s->new_picture = *pict;
else
s->spatial_decomposition_count= 5;
+ while( !(width >>(s->chroma_h_shift + s->spatial_decomposition_count))
+ || !(height>>(s->chroma_v_shift + s->spatial_decomposition_count)))
+ s->spatial_decomposition_count--;
+
s->m.pict_type = pic->pict_type;
s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
/* if(QUANTIZE2)
dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
else*/
- ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+ ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
if(s->pass1_rc && plane_index==0){
int delta_qlog = ratecontrol_1pass(s, pic);
}
}
- ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+ ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
if(s->qlog == LOSSLESS_QLOG){
for(y=0; y<h; y++){
for(x=0; x<w; x++){
s.spatial_decomposition_count=6;
s.spatial_decomposition_type=1;
+ s.temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
+ s.temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
+
av_lfg_init(&prng, 1);
printf("testing 5/3 DWT\n");
for(i=0; i<width*height; i++)
buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
- ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
- ff_spatial_idwt((IDWTELEM*)buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
for(i=0; i<width*height; i++)
if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
for(i=0; i<width*height; i++)
buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
- ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
- ff_spatial_idwt((IDWTELEM*)buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
for(i=0; i<width*height; i++)
if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
memset(buffer[0], 0, sizeof(int)*width*height);
buf[w/2 + h/2*stride]= 256*256;
- ff_spatial_idwt((IDWTELEM*)buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
for(y=0; y<height; y++){
for(x=0; x<width; x++){
int64_t d= buffer[0][x + y*width];
buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
}
}
- ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+ ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
for(y=0; y<height; y++){
for(x=0; x<width; x++){
int64_t d= buffer[0][x + y*width];