X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fsnow.c;h=b5a381024cb732e69ce0a61b3c4993bef230550f;hb=b6be4acc1fbe78bf3708487465a7211bb1cfb280;hp=2b328b80c98fa1a2c6fb4f780858003165d954c2;hpb=96e2fbf27b36598aef084225d380335f27eb6598;p=ffmpeg diff --git a/libavcodec/snow.c b/libavcodec/snow.c index 2b328b80c98..b5a381024cb 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -406,7 +406,7 @@ typedef struct SubBand{ int stride; int width; int height; - int qlog; ///< log(qscale)/log[2^(1/6)] + int qlog; ///< log(qscale)/log[2^(1/6)] DWTELEM *buf; IDWTELEM *ibuf; int buf_x_offset; @@ -433,7 +433,7 @@ typedef struct Plane{ }Plane; typedef struct SnowContext{ -// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) +// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) AVCodecContext *avctx; RangeCoder c; @@ -487,7 +487,7 @@ typedef struct SnowContext{ int me_cache_generation; slice_buffer sb; - MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) + MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) }SnowContext; typedef struct { @@ -514,8 +514,7 @@ static void slice_buffer_init(slice_buffer * buf, int line_count, int max_alloca buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); - for (i = 0; i < max_allocated_lines; i++) - { + for(i = 0; i < max_allocated_lines; i++){ buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); } @@ -527,8 +526,6 @@ static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) int offset; IDWTELEM * buffer; -// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); - assert(buf->data_stack_top >= 0); // assert(!buf->line[line]); if (buf->line[line]) @@ -539,8 +536,6 @@ static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) buf->data_stack_top--; buf->line[line] = buffer; -// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); - return buffer; } @@ -557,20 +552,14 @@ static void slice_buffer_release(slice_buffer * buf, int line) buf->data_stack_top++; buf->data_stack[buf->data_stack_top] = buffer; buf->line[line] = NULL; - -// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); } static void slice_buffer_flush(slice_buffer * buf) { int i; - for (i = 0; i < buf->line_count; i++) - { + for(i = 0; i < buf->line_count; i++){ if (buf->line[i]) - { -// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); slice_buffer_release(buf, i); - } } } @@ -579,8 +568,7 @@ static void slice_buffer_destroy(slice_buffer * buf) int i; slice_buffer_flush(buf); - for (i = buf->data_count - 1; i >= 0; i--) - { + for(i = buf->data_count - 1; i >= 0; i--){ av_freep(&buf->data_stack[i]); } av_freep(&buf->data_stack); @@ -856,7 +844,7 @@ inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); } } -#endif +#endif /* ! liftS */ static void horizontal_decompose53i(DWTELEM *b, int width){ DWTELEM temp[width]; @@ -904,7 +892,7 @@ static void horizontal_decompose53i(DWTELEM *b, int width){ #else lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); -#endif +#endif /* 0 */ } static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ @@ -932,15 +920,11 @@ static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int str DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; -{START_TIMER if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); -STOP_TIMER("horizontal_decompose53i")} -{START_TIMER if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); -STOP_TIMER("vertical_decompose53i*")} b0=b2; b1=b3; @@ -1005,23 +989,14 @@ static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int str DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; -{START_TIMER if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); -if(width>400){ -STOP_TIMER("horizontal_decompose97i") -}} -{START_TIMER if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); -if(width>400){ -STOP_TIMER("vertical_decompose97i") -}} - b0=b2; b1=b3; b2=b4; @@ -1123,15 +1098,11 @@ static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); -{START_TIMER if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); -STOP_TIMER("vertical_compose53i*")} -{START_TIMER if(y-1<(unsigned)height) horizontal_compose53i(b0, width); if(y+0<(unsigned)height) horizontal_compose53i(b1, width); -STOP_TIMER("horizontal_compose53i")} cs->b0 = b2; cs->b1 = b3; @@ -1145,22 +1116,18 @@ static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; -{START_TIMER if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); -STOP_TIMER("vertical_compose53i*")} -{START_TIMER if(y-1<(unsigned)height) horizontal_compose53i(b0, width); if(y+0<(unsigned)height) horizontal_compose53i(b1, width); -STOP_TIMER("horizontal_compose53i")} cs->b0 = b2; cs->b1 = b3; cs->y += 2; } -static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ +static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ dwt_compose_t cs; spatial_compose53i_init(&cs, buffer, height, stride); while(cs.y <= height) @@ -1255,7 +1222,6 @@ static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, s IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); -{START_TIMER if(y>0 && y+4vertical_compose97i(b0, b1, b2, b3, b4, b5, width); }else{ @@ -1264,14 +1230,9 @@ static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, s if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); } -if(width>400){ -STOP_TIMER("vertical_compose97i")}} -{START_TIMER if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); -if(width>400 && y+0<(unsigned)height){ -STOP_TIMER("horizontal_compose97i")}} cs->b0=b2; cs->b1=b3; @@ -1289,19 +1250,13 @@ static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; -{START_TIMER if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); -if(width>400){ -STOP_TIMER("vertical_compose97i")}} -{START_TIMER if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); -if(width>400 && b0 <= b2){ -STOP_TIMER("horizontal_compose97i")}} cs->b0=b2; cs->b1=b3; @@ -1310,7 +1265,7 @@ STOP_TIMER("horizontal_compose97i")}} cs->y += 2; } -static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ +static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ dwt_compose_t cs; spatial_compose97i_init(&cs, buffer, height, stride); while(cs.y <= height) @@ -1346,9 +1301,9 @@ static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ switch(type){ case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<>level, height>>level, stride<>level)+support, height>>level)){ switch(type){ case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<>level, height>>level, stride_line<qbias*qmul)>>QBIAS_SHIFT; int new_index = 0; - START_TIMER - if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ qadd= 0; qmul= 1<width*sizeof(IDWTELEM)); v = b->x_coeff[new_index].coeff; x = b->x_coeff[new_index++].x; - while(x < w) - { + while(x < w){ register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; register int u= -(v&1); line[x] = (t^u) - u; @@ -1643,9 +1595,6 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli x = b->x_coeff[new_index++].x; } } - if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ - STOP_TIMER("decode_subband") - } /* Save our variables for the next slice. */ save_state[0] = new_index; @@ -2193,8 +2142,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, i uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; int16_t *tmpI= tmpIt; uint8_t *tmp2= tmp2t[0]; - uint8_t *hpel[11]; -START_TIMER + const uint8_t *hpel[11]; assert(dx<16 && dy<16); r= brane[dx + 16*dy]&15; l= brane[dx + 16*dy]>>4; @@ -2302,10 +2250,10 @@ START_TIMER hpel[10]= hpel[8] + 1; if(b==15){ - uint8_t *src1= hpel[dx/8 + dy/8*4 ]; - uint8_t *src2= hpel[dx/8 + dy/8*4+1]; - uint8_t *src3= hpel[dx/8 + dy/8*4+4]; - uint8_t *src4= hpel[dx/8 + dy/8*4+5]; + const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; + const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; + const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; + const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; dx&=7; dy&=7; for(y=0; y < b_h; y++){ @@ -2320,8 +2268,8 @@ START_TIMER dst +=stride; } }else{ - uint8_t *src1= hpel[l]; - uint8_t *src2= hpel[r]; + const uint8_t *src1= hpel[l]; + const uint8_t *src2= hpel[r]; int a= weight[((dx&7) + (8*(dy&7)))]; int b= 8-a; for(y=0; y < b_h; y++){ @@ -2333,7 +2281,6 @@ START_TIMER dst +=stride; } } -STOP_TIMER("mc_block") } #define mca(dx,dy,b_w)\ @@ -2410,7 +2357,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); // assert(!(b_w&(b_w-1))); assert(b_w>1 && b_h>1); - assert(tab_index>=0 && tab_index<4 || b_w==32); + assert((tab_index>=0 && tab_index<4) || b_w==32); if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); else if(b_w==32){ @@ -2465,7 +2412,7 @@ void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_ } } -//FIXME name clenup (b_w, block_w, b_width stuff) +//FIXME name cleanup (b_w, block_w, b_width stuff) static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; @@ -2495,7 +2442,7 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer rb= rt; } - if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 + if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 obmc -= src_x; b_w += src_x; if(!sliced && !offset_dst) @@ -2516,7 +2463,8 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer if(b_w<=0 || b_h<=0) return; -assert(src_stride > 2*MB_SIZE + 5); + assert(src_stride > 2*MB_SIZE + 5); + if(!sliced && offset_dst) dst += src_x + src_y*dst_stride; dst8+= src_x + src_y*src_stride; @@ -2590,34 +2538,32 @@ assert(src_stride > 2*MB_SIZE + 5); } #else if(sliced){ - START_TIMER - s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); - STOP_TIMER("inner_add_yblock") - }else - for(y=0; y>1); - const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); - const uint8_t *obmc4= obmc3+ (obmc_stride>>1); - for(x=0; x>= 8 - FRAC_BITS; - } - if(add){ - v += dst[x + y*dst_stride]; - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; - if(v&(~255)) v= ~(v>>31); - dst8[x + y*src_stride] = v; - }else{ - dst[x + y*dst_stride] -= v; + }else{ + for(y=0; y>1); + const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + const uint8_t *obmc4= obmc3+ (obmc_stride>>1); + for(x=0; x>= 8 - FRAC_BITS; + } + if(add){ + v += dst[x + y*dst_stride]; + v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ + dst[x + y*dst_stride] -= v; + } } } } @@ -2637,19 +2583,16 @@ static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; - START_TIMER if(s->keyframe || (s->avctx->debug&512)){ if(mb_y==mb_h) return; if(add){ - for(y=block_w*mb_y; yline[y]; - for(x=0; x>= FRAC_BITS; @@ -2658,12 +2601,10 @@ static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer } } }else{ - for(y=block_w*mb_y; yline[y]; - for(x=0; xcurrent_picture.data[plane_index]; int w= p->width; int h= p->height; - START_TIMER if(s->keyframe || (s->avctx->debug&512)){ if(mb_y==mb_h) @@ -2730,22 +2664,16 @@ static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int pl return; } - for(mb_x=0; mb_x<=mb_w; mb_x++){ - START_TIMER - - add_yblock(s, 0, NULL, buf, dst8, obmc, - block_w*mb_x - block_w/2, - block_w*mb_y - block_w/2, - block_w, block_w, - w, h, - w, ref_stride, obmc_stride, - mb_x - 1, mb_y - 1, - add, 1, plane_index); - - STOP_TIMER("add_yblock") - } - - STOP_TIMER("predict_slice") + for(mb_x=0; mb_x<=mb_w; mb_x++){ + add_yblock(s, 0, NULL, buf, dst8, obmc, + block_w*mb_x - block_w/2, + block_w*mb_y - block_w/2, + block_w, block_w, + w, h, + w, ref_stride, obmc_stride, + mb_x - 1, mb_y - 1, + add, 1, plane_index); + } } static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ @@ -2796,12 +2724,12 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ if(x<0) obmc_v += obmc[index + block_w]; if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; if(x+block_w>w) obmc_v += obmc[index - block_w]; - //FIXME precalc this or simplify it somehow else + //FIXME precalculate this or simplify it somehow else d = -dst[index] + (1<<(FRAC_BITS-1)); dst[index] = d; ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; - aa += obmc_v * obmc_v; //FIXME precalclate this + aa += obmc_v * obmc_v; //FIXME precalculate this } } } @@ -2833,7 +2761,7 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){ 00001XXXX 15-30 8-15 */ //FIXME try accurate rate -//FIXME intra and inter predictors if surrounding blocks arent the same type +//FIXME intra and inter predictors if surrounding blocks are not the same type if(b->type & BLOCK_INTRA){ return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) + av_log2(2*FFABS(left->color[1] - b->color[1])) @@ -2912,10 +2840,10 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con if(block_w==16){ /* FIXME rearrange dsputil to fit 32x32 cmp functions */ /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ - /* FIXME cmps overlap but don't cover the wavelet's whole support, - * so improving the score of one block is not strictly guaranteed to - * improve the score of the whole frame, so iterative motion est - * doesn't always converge. */ + /* FIXME cmps overlap but do not cover the wavelet's whole support. + * So improving the score of one block is not strictly guaranteed + * to improve the score of the whole frame, thus iterative motion + * estimation does not always converge. */ if(s->avctx->me_cmp == FF_CMP_W97) distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); else if(s->avctx->me_cmp == FF_CMP_W53) @@ -2956,7 +2884,9 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ const int ref_stride= s->current_picture.linesize[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; - static const IDWTELEM zero_dst[4096]; //FIXME + //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst + // const has only been removed from zero_dst to suppress a warning + static IDWTELEM zero_dst[4096]; //FIXME const int b_stride = s->b_width << s->block_max_depth; const int w= p->width; const int h= p->height; @@ -3048,7 +2978,8 @@ static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int } } -/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ +/* special case for int[2] args we discard afterwards, + * fixes compilation problem with gcc 2.95 */ static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ int p[2] = {p0, p1}; return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); @@ -3140,7 +3071,7 @@ static void iterative_me(SnowContext *s){ memset(s->me_cache, 0, sizeof(s->me_cache)); s->me_cache_generation += 1<<22; - //FIXME precalc + //FIXME precalculate { int x, y; memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); @@ -3165,8 +3096,7 @@ static void iterative_me(SnowContext *s){ } //skip stuff outside the picture - if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) - { + if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ uint8_t *src= s-> input_picture.data[0]; uint8_t *dst= s->current_picture.data[0]; const int stride= s->current_picture.linesize[0]; @@ -3223,7 +3153,7 @@ static void iterative_me(SnowContext *s){ check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); /* fullpel ME */ - //FIXME avoid subpel interpol / round to nearest integer + //FIXME avoid subpel interpolation / round to nearest integer do{ dia_change=0; for(i=0; iavctx->dia_size, 1); i++){ @@ -3317,13 +3247,11 @@ static void iterative_me(SnowContext *s){ } static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ - const int level= b->level; const int w= b->width; const int h= b->height; const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); int x,y, thres1, thres2; -// START_TIMER if(s->qlog == LOSSLESS_QLOG){ for(y=0; yspatial_decomposition_count){ -// STOP_TIMER("quantize") - } } static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ @@ -3388,7 +3313,6 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int x,y; - START_TIMER if(s->qlog == LOSSLESS_QLOG) return; @@ -3404,9 +3328,6 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand } } } - if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ - STOP_TIMER("dquant") - } } static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ @@ -3416,7 +3337,6 @@ static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int x,y; - START_TIMER if(s->qlog == LOSSLESS_QLOG) return; @@ -3430,9 +3350,6 @@ static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ } } } - if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ - STOP_TIMER("dquant") - } } static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ @@ -3463,8 +3380,6 @@ static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand const int w= b->width; int x,y; -// START_TIMER - IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning IDWTELEM * prev; @@ -3489,8 +3404,6 @@ static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand } } } - -// STOP_TIMER("correlate") } static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ @@ -3583,10 +3496,6 @@ static void encode_header(SnowContext *s){ put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); for(i= p->htaps/2; i; i--) put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); - - p->last_diag_mc= p->diag_mc; - p->last_htaps= p->htaps; - memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); } } if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ @@ -3603,12 +3512,26 @@ static void encode_header(SnowContext *s){ put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); - s->last_spatial_decomposition_type= s->spatial_decomposition_type; - s->last_qlog = s->qlog; - s->last_qbias = s->qbias; - s->last_mv_scale = s->mv_scale; - s->last_block_max_depth = s->block_max_depth; - s->last_spatial_decomposition_count= s->spatial_decomposition_count; +} + +static void update_last_header_values(SnowContext *s){ + int plane_index; + + if(!s->keyframe){ + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + p->last_diag_mc= p->diag_mc; + p->last_htaps = p->htaps; + memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); + } + } + + s->last_spatial_decomposition_type = s->spatial_decomposition_type; + s->last_qlog = s->qlog; + s->last_qbias = s->qbias; + s->last_mv_scale = s->mv_scale; + s->last_block_max_depth = s->block_max_depth; + s->last_spatial_decomposition_count = s->spatial_decomposition_count; } static void decode_qlogs(SnowContext *s){ @@ -3717,7 +3640,7 @@ static void init_qexp(void){ } } -static int common_init(AVCodecContext *avctx){ +static av_cold int common_init(AVCodecContext *avctx){ SnowContext *s = avctx->priv_data; int width, height; int i, j; @@ -3773,7 +3696,7 @@ static int common_init(AVCodecContext *avctx){ height= s->avctx->height; s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); - s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here + s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here for(i=0; iplane[plane_index].width = w; s->plane[plane_index].height= h; -//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); for(level=s->spatial_decomposition_count-1; level>=0; level--){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ SubBand *b= &s->plane[plane_index].band[level][orientation]; @@ -3845,7 +3767,7 @@ static int qscale2qlog(int qscale){ static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) { - /* estimate the frame's complexity as a sum of weighted dwt coefs. + /* Estimate the frame's complexity as a sum of weighted dwt coefficients. * FIXME we know exact mv bits at this point, * but ratecontrol isn't set up to include them. */ uint32_t coef_sum= 0; @@ -3878,7 +3800,7 @@ static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) coef_sum = (uint64_t)coef_sum * coef_sum >> 16; assert(coef_sum < INT_MAX); - if(pict->pict_type == I_TYPE){ + if(pict->pict_type == FF_I_TYPE){ s->m.current_picture.mb_var_sum= coef_sum; s->m.current_picture.mc_mb_var_sum= 0; }else{ @@ -3917,7 +3839,6 @@ static void calculate_visual_weight(SnowContext *s, Plane *p){ } b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); -// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/); } } } @@ -3939,7 +3860,7 @@ static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDW if(orientation&2) yo= step>>1; - //FIXME bias for non zero ? + //FIXME bias for nonzero ? //FIXME optimize memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); for(y=0; yheight; y++){ @@ -3987,7 +3908,7 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i SubBand *b= &p->band[level][orientation]; IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); - assert(src == b->buf); // code doesnt depen on this but its true currently + assert(src == b->buf); // code does not depend on this but it is true currently quantize(s, b, dst, src, b->stride, s->qbias); } @@ -4012,7 +3933,7 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i for(x=xs; xwidth; x+= Q2_STEP){ if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; - //FIXME try more then just -- + //FIXME try more than just -- } } dequantize_all(s, p, idwt2_buffer, width, height); @@ -4034,26 +3955,26 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i } } } - memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly insteda of copy at the end + memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end } #endif /* QUANTIZE2==1 */ -static int encode_init(AVCodecContext *avctx) +static av_cold int encode_init(AVCodecContext *avctx) { SnowContext *s = avctx->priv_data; int plane_index; if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ - av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n" - "use vstrict=-2 / -strict -2 to use it anyway\n"); + av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" + "Use vstrict=-2 / -strict -2 to use it anyway.\n"); return -1; } if(avctx->prediction_method == DWT_97 && (avctx->flags & CODEC_FLAG_QSCALE) && avctx->global_quality == 0){ - av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n"); + av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); return -1; } @@ -4139,6 +4060,8 @@ static int encode_init(AVCodecContext *avctx) return 0; } +#define USE_HALFPEL_PLANE 0 + static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ int p,x,y; @@ -4189,18 +4112,16 @@ static int frame_start(SnowContext *s){ int h= s->avctx->height; if(s->current_picture.data[0]){ - draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); - draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); - draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); + s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); + s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); + s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); } tmp= s->last_picture[s->max_ref_frames-1]; memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); -#ifdef USE_HALFPEL_PLANE - if(s->current_picture.data[0]) + if(USE_HALFPEL_PLANE && s->current_picture.data[0]) halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); -#endif s->last_picture[0]= s->current_picture; s->current_picture= tmp; @@ -4272,12 +4193,12 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ s->qlog= LOSSLESS_QLOG; s->lambda = 0; - }//else keep previous frame's qlog until after motion est + }//else keep previous frame's qlog until after motion estimation frame_start(s); s->m.current_picture_ptr= &s->m.current_picture; - if(pict->pict_type == P_TYPE){ + if(pict->pict_type == FF_P_TYPE){ int block_width = (width +15)>>4; int block_height= (height+15)>>4; int stride= s->current_picture.linesize[0]; @@ -4326,13 +4247,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, redo_frame: - if(pict->pict_type == I_TYPE) + if(pict->pict_type == FF_I_TYPE) s->spatial_decomposition_count= 5; else s->spatial_decomposition_count= 5; s->m.pict_type = pict->pict_type; - s->qbias= pict->pict_type == P_TYPE ? 2 : 0; + s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; common_init_after_header(avctx); @@ -4354,99 +4275,96 @@ redo_frame: int x, y; // int bits= put_bits_count(&s->c.pb); - if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ - //FIXME optimize - if(pict->data[plane_index]) //FIXME gray hack - for(y=0; yspatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<flags2 & CODEC_FLAG2_MEMC_ONLY)){ + //FIXME optimize + if(pict->data[plane_index]) //FIXME gray hack + for(y=0; yspatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<spatial_idwt_buffer, plane_index, 0); + + if( plane_index==0 + && pict->pict_type == FF_P_TYPE + && !(avctx->flags&CODEC_FLAG_PASS2) + && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ + ff_init_range_encoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + pict->pict_type= FF_I_TYPE; + s->keyframe=1; + s->current_picture.key_frame=1; + goto redo_frame; } - } - predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); - - if( plane_index==0 - && pict->pict_type == P_TYPE - && !(avctx->flags&CODEC_FLAG_PASS2) - && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ - ff_init_range_encoder(c, buf, buf_size); - ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); - pict->pict_type= FF_I_TYPE; - s->keyframe=1; - s->current_picture.key_frame=1; - goto redo_frame; - } - if(s->qlog == LOSSLESS_QLOG){ - for(y=0; yspatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; + if(s->qlog == LOSSLESS_QLOG){ + for(y=0; yspatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; + } } - } - }else{ - for(y=0; yspatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); - else - ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); + /* if(QUANTIZE2) + dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); + else*/ + ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); - if(s->pass1_rc && plane_index==0){ - int delta_qlog = ratecontrol_1pass(s, pict); - if (delta_qlog <= INT_MIN) - return -1; - if(delta_qlog){ - //reordering qlog in the bitstream would eliminate this reset - ff_init_range_encoder(c, buf, buf_size); - memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); - memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); - encode_header(s); - encode_blocks(s, 0); + if(s->pass1_rc && plane_index==0){ + int delta_qlog = ratecontrol_1pass(s, pict); + if (delta_qlog <= INT_MIN) + return -1; + if(delta_qlog){ + //reordering qlog in the bitstream would eliminate this reset + ff_init_range_encoder(c, buf, buf_size); + memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); + memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); + encode_header(s); + encode_blocks(s, 0); + } } - } - - for(level=0; levelspatial_decomposition_count; level++){ - for(orientation=level ? 1 : 0; orientation<4; orientation++){ - SubBand *b= &p->band[level][orientation]; - if(!QUANTIZE2) - quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); - if(orientation==0) - decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); - encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); - assert(b->parent==NULL || b->parent->stride == b->stride*2); - if(orientation==0) - correlate(s, b, b->ibuf, b->stride, 1, 0); + for(level=0; levelspatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + + if(!QUANTIZE2) + quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); + if(orientation==0) + decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); + encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); + assert(b->parent==NULL || b->parent->stride == b->stride*2); + if(orientation==0) + correlate(s, b, b->ibuf, b->stride, 1, 0); + } } - } -// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); - for(level=0; levelspatial_decomposition_count; level++){ - for(orientation=level ? 1 : 0; orientation<4; orientation++){ - SubBand *b= &p->band[level][orientation]; + for(level=0; levelspatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; - dequantize(s, b, b->ibuf, b->stride); + dequantize(s, b, b->ibuf, b->stride); + } } - } - ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); - if(s->qlog == LOSSLESS_QLOG){ - for(y=0; yspatial_idwt_buffer[y*w + x]<<=FRAC_BITS; + ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); + if(s->qlog == LOSSLESS_QLOG){ + for(y=0; yspatial_idwt_buffer[y*w + x]<<=FRAC_BITS; + } } } - } -{START_TIMER - predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); -STOP_TIMER("pred-conv")} - }else{ + predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); + }else{ //ME/MC only - if(pict->pict_type == I_TYPE){ + if(pict->pict_type == FF_I_TYPE){ for(y=0; ycurrent_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= @@ -4457,22 +4375,25 @@ STOP_TIMER("pred-conv")} memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); } - } + } if(s->avctx->flags&CODEC_FLAG_PSNR){ int64_t error= 0; - if(pict->data[plane_index]) //FIXME gray hack - for(y=0; ycurrent_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; - error += d*d; + if(pict->data[plane_index]) //FIXME gray hack + for(y=0; ycurrent_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; + error += d*d; + } } - } s->avctx->error[plane_index] += error; s->current_picture.error[plane_index] = error; } + } + update_last_header_values(s); + if(s->last_picture[s->max_ref_frames-1].data[0]){ avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); for(i=0; i<9; i++) @@ -4505,7 +4426,7 @@ STOP_TIMER("pred-conv")} return ff_rac_terminate(c); } -static void common_end(SnowContext *s){ +static av_cold void common_end(SnowContext *s){ int plane_index, level, orientation, i; av_freep(&s->spatial_dwt_buffer); @@ -4536,7 +4457,7 @@ static void common_end(SnowContext *s){ } } -static int encode_end(AVCodecContext *avctx) +static av_cold int encode_end(AVCodecContext *avctx) { SnowContext *s = avctx->priv_data; @@ -4546,7 +4467,7 @@ static int encode_end(AVCodecContext *avctx) return 0; } -static int decode_init(AVCodecContext *avctx) +static av_cold int decode_init(AVCodecContext *avctx) { avctx->pix_fmt= PIX_FMT_YUV420P; @@ -4555,7 +4476,7 @@ static int decode_init(AVCodecContext *avctx) return 0; } -static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){ SnowContext *s = avctx->priv_data; RangeCoder * const c= &s->c; int bytes_read; @@ -4584,7 +4505,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 if(!s->block) alloc_blocks(s); frame_start(s); - //keyframe flag dupliaction mess FIXME + //keyframe flag duplication mess FIXME if(avctx->debug&FF_DEBUG_PICT_INFO) av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); @@ -4597,111 +4518,103 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 int x, y; int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ -if(s->avctx->debug&2048){ - memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); - predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); + if(s->avctx->debug&2048){ + memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); + predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); - for(y=0; ycurrent_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; - s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; + for(y=0; ycurrent_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; + s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; + } } } -} - -{ START_TIMER - for(level=0; levelspatial_decomposition_count; level++){ - for(orientation=level ? 1 : 0; orientation<4; orientation++){ - SubBand *b= &p->band[level][orientation]; - unpack_coeffs(s, b, b->parent, orientation); - } - } - STOP_TIMER("unpack coeffs"); -} - -{START_TIMER - const int mb_h= s->b_height << s->block_max_depth; - const int block_size = MB_SIZE >> s->block_max_depth; - const int block_w = plane_index ? block_size/2 : block_size; - int mb_y; - dwt_compose_t cs[MAX_DECOMPOSITIONS]; - int yd=0, yq=0; - int y; - int end_y; - - ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); - for(mb_y=0; mb_y<=mb_h; mb_y++){ - - int slice_starty = block_w*mb_y; - int slice_h = block_w*(mb_y+1); - if (!(s->keyframe || s->avctx->debug&512)){ - slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); - slice_h -= (block_w >> 1); - } { - START_TIMER for(level=0; levelspatial_decomposition_count; level++){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ SubBand *b= &p->band[level][orientation]; - int start_y; - int end_y; - int our_mb_start = mb_y; - int our_mb_end = (mb_y + 1); - const int extra= 3; - start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); - end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); - if (!(s->keyframe || s->avctx->debug&512)){ - start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); - end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); - } - start_y = FFMIN(b->height, start_y); - end_y = FFMIN(b->height, end_y); - - if (start_y != end_y){ - if (orientation == 0){ - SubBand * correlate_band = &p->band[0][0]; - int correlate_end_y = FFMIN(b->height, end_y + 1); - int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); - decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); - correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); - dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); - } - else - decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); - } + unpack_coeffs(s, b, b->parent, orientation); } } - STOP_TIMER("decode_subband_slice"); } -{ START_TIMER - for(; yddsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); - } - STOP_TIMER("idwt slice");} + { + const int mb_h= s->b_height << s->block_max_depth; + const int block_size = MB_SIZE >> s->block_max_depth; + const int block_w = plane_index ? block_size/2 : block_size; + int mb_y; + dwt_compose_t cs[MAX_DECOMPOSITIONS]; + int yd=0, yq=0; + int y; + int end_y; + ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); + for(mb_y=0; mb_y<=mb_h; mb_y++){ - if(s->qlog == LOSSLESS_QLOG){ - for(; yqsb, yq); - for(x=0; xkeyframe || s->avctx->debug&512)){ + slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); + slice_h -= (block_w >> 1); + } + + for(level=0; levelspatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + int start_y; + int end_y; + int our_mb_start = mb_y; + int our_mb_end = (mb_y + 1); + const int extra= 3; + start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); + end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); + if (!(s->keyframe || s->avctx->debug&512)){ + start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); + end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); + } + start_y = FFMIN(b->height, start_y); + end_y = FFMIN(b->height, end_y); + + if (start_y != end_y){ + if (orientation == 0){ + SubBand * correlate_band = &p->band[0][0]; + int correlate_end_y = FFMIN(b->height, end_y + 1); + int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); + decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); + correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); + dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); + } + else + decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); + } } } - } - predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); + for(; yddsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); + } - y = FFMIN(p->height, slice_starty); - end_y = FFMIN(p->height, slice_h); - while(y < end_y) - slice_buffer_release(&s->sb, y++); - } + if(s->qlog == LOSSLESS_QLOG){ + for(; yqsb, yq); + for(x=0; xsb, s->spatial_idwt_buffer, plane_index, 1, mb_y); - slice_buffer_flush(&s->sb); + y = FFMIN(p->height, slice_starty); + end_y = FFMIN(p->height, slice_h); + while(y < end_y) + slice_buffer_release(&s->sb, y++); + } + + slice_buffer_flush(&s->sb); + } -STOP_TIMER("idwt + predict_slices")} } emms_c(); @@ -4713,10 +4626,10 @@ STOP_TIMER("idwt + predict_slices")} av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); } -if(!(s->avctx->debug&2048)) - *picture= s->current_picture; -else - *picture= s->mconly_picture; + if(!(s->avctx->debug&2048)) + *picture= s->current_picture; + else + *picture= s->mconly_picture; *data_size = sizeof(AVFrame); @@ -4726,7 +4639,7 @@ else return bytes_read; } -static int decode_end(AVCodecContext *avctx) +static av_cold int decode_end(AVCodecContext *avctx) { SnowContext *s = avctx->priv_data; @@ -4763,13 +4676,13 @@ AVCodec snow_encoder = { #endif -#if 0 +#ifdef TEST #undef malloc #undef free #undef printf #undef random -int main(){ +int main(void){ int width=256; int height=256; int buffer[2][width*height]; @@ -4806,9 +4719,7 @@ int main(){ ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); for(i=-256; i<256; i++){ -START_TIMER put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); -STOP_TIMER("put_symbol") } ff_rac_terminate(&s.c); @@ -4818,62 +4729,59 @@ STOP_TIMER("put_symbol") for(i=-256; i<256; i++){ int j; -START_TIMER j= get_symbol(&s.c, s.header_state, 1); -STOP_TIMER("get_symbol") if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); } #endif -{ -int level, orientation, x, y; -int64_t errors[8][4]; -int64_t g=0; - - memset(errors, 0, sizeof(errors)); - s.spatial_decomposition_count=3; - s.spatial_decomposition_type=0; - for(level=0; level> (s.spatial_decomposition_count-level); - int h= height >> (s.spatial_decomposition_count-level); - int stride= width << (s.spatial_decomposition_count-level); - DWTELEM *buf= buffer[0]; - int64_t error=0; - - if(orientation&1) buf+=w; - if(orientation>1) buf+=stride>>1; + { + int level, orientation, x, y; + int64_t errors[8][4]; + int64_t g=0; - memset(buffer[0], 0, sizeof(int)*width*height); - buf[w/2 + h/2*stride]= 256*256; - ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); - for(y=0; y> (s.spatial_decomposition_count-level); + int h= height >> (s.spatial_decomposition_count-level); + int stride= width << (s.spatial_decomposition_count-level); + DWTELEM *buf= buffer[0]; + int64_t error=0; + + if(orientation&1) buf+=w; + if(orientation>1) buf+=stride>>1; + + memset(buffer[0], 0, sizeof(int)*width*height); + buf[w/2 + h/2*stride]= 256*256; + ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); + for(y=0; y> (s.spatial_decomposition_count-level); - int h= height >> (s.spatial_decomposition_count-level); + //int h= height >> (s.spatial_decomposition_count-level); int stride= width << (s.spatial_decomposition_count-level); DWTELEM *buf= buffer[0]; int64_t error=0; @@ -4907,9 +4815,9 @@ int64_t g=0; } if(FFABS(height/2-y)<9) printf("\n"); } - } + } -} + } return 0; } -#endif /* 0 */ +#endif /* TEST */