X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fsnow.c;h=dc50941fac5655113451e0f7db2e74a2c72d0b60;hb=226d6947bc6e6592eabbcb7b401bf164be5bfe6e;hp=98d2d4875c9e54bb073ecfea47fce04d316ff3bc;hpb=ed4e20ac7fb83bddf4c9a1fc65f30821dcbc7935;p=ffmpeg diff --git a/libavcodec/snow.c b/libavcodec/snow.c index 98d2d4875c9..dc50941fac5 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -1,18 +1,20 @@ /* * Copyright (C) 2004 Michael Niedermayer * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -420,7 +422,7 @@ typedef struct Plane{ }Plane; typedef struct SnowContext{ -// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) +// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) AVCodecContext *avctx; RangeCoder c; @@ -437,6 +439,7 @@ typedef struct SnowContext{ int always_reset; int version; int spatial_decomposition_type; + int last_spatial_decomposition_type; int temporal_decomposition_type; int spatial_decomposition_count; int temporal_decomposition_count; @@ -450,15 +453,19 @@ typedef struct SnowContext{ int chroma_v_shift; int spatial_scalability; int qlog; + int last_qlog; int lambda; int lambda2; int pass1_rc; int mv_scale; + int last_mv_scale; int qbias; + int last_qbias; #define QBIAS_SHIFT 3 int b_width; int b_height; int block_max_depth; + int last_block_max_depth; Plane plane[MAX_PLANES]; BlockNode *block; #define ME_CACHE_SIZE 1024 @@ -466,7 +473,7 @@ typedef struct SnowContext{ int me_cache_generation; slice_buffer sb; - MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) + MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) }SnowContext; typedef struct { @@ -588,7 +595,7 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe int i; if(v){ - const int a= ABS(v); + const int a= FFABS(v); const int e= av_log2(a); #if 1 const int el= FFMIN(e, 10); @@ -707,7 +714,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ return v; } -static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -730,7 +737,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst } #ifndef lift5 -static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -762,7 +769,7 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds #endif #ifndef liftS -static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ +static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; const int w= (width>>1) - 1 + (highpass & width); @@ -1664,7 +1671,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE p= parent[px + py*2*stride]; } if(/*ll|*/l|lt|t|rt|p){ - int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); + int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); put_rac(&s->c, &b->state[0][context], !!v); }else{ @@ -1680,11 +1687,11 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE } } if(v){ - int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); - int l2= 2*ABS(l) + (l<0); - int t2= 2*ABS(t) + (t<0); + int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); + int l2= 2*FFABS(l) + (l<0); + int t2= 2*FFABS(t) + (t<0); - put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4); + put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); } } @@ -1747,7 +1754,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i } } if(/*ll|*/l|lt|t|rt|p){ - int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); + int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); v=get_rac(&s->c, &b->state[0][context]); if(v){ @@ -1803,7 +1810,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ const int w= b->width; int y; - const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int new_index = 0; @@ -1847,7 +1854,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli return; } -static void reset_contexts(SnowContext *s){ +static void reset_contexts(SnowContext *s){ //FIXME better initial contexts int plane_index, level, orientation; for(plane_index=0; plane_index<3; plane_index++){ @@ -1900,7 +1907,7 @@ static int pix_sum(uint8_t * pix, int line_size, int w) static int pix_norm1(uint8_t * pix, int line_size, int w) { int s, i, j; - uint32_t *sq = squareTbl + 256; + uint32_t *sq = ff_squareTbl + 256; s = 0; for (i = 0; i < w; i++) { @@ -1952,18 +1959,18 @@ static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3 } static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, - BlockNode *left, BlockNode *top, BlockNode *tr){ + const BlockNode *left, const BlockNode *top, const BlockNode *tr){ if(s->ref_frames == 1){ *mx = mid_pred(left->mx, top->mx, tr->mx); *my = mid_pred(left->my, top->my, tr->my); }else{ const int *scale = scale_mv_ref[ref]; - *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8, - top ->mx * scale[top ->ref] + 128 >>8, - tr ->mx * scale[tr ->ref] + 128 >>8); - *my = mid_pred(left->my * scale[left->ref] + 128 >>8, - top ->my * scale[top ->ref] + 128 >>8, - tr ->my * scale[tr ->ref] + 128 >>8); + *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, + (top ->mx * scale[top ->ref] + 128) >>8, + (tr ->mx * scale[tr ->ref] + 128) >>8); + *my = mid_pred((left->my * scale[left->ref] + 128) >>8, + (top ->my * scale[top ->ref] + 128) >>8, + (tr ->my * scale[tr ->ref] + 128) >>8); } } @@ -1991,12 +1998,12 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ const int block_w= 1<<(LOG2_MB_SIZE - level); int trx= (x+1)<block[index-1] : &null_block; - BlockNode *top = y ? &s->block[index-w] : &null_block; - BlockNode *right = trxblock[index+1] : &null_block; - BlockNode *bottom= tryblock[index+w] : &null_block; - BlockNode *tl = y && x ? &s->block[index-w-1] : left; - BlockNode *tr = y && trxblock[index-w+(1<block[index-1] : &null_block; + const BlockNode *top = y ? &s->block[index-w] : &null_block; + const BlockNode *right = trxblock[index+1] : &null_block; + const BlockNode *bottom= tryblock[index+w] : &null_block; + const BlockNode *tl = y && x ? &s->block[index-w-1] : left; + const BlockNode *tr = y && trxblock[index-w+(1<color[0]; int pcb= left->color[1]; int pcr= left->color[2]; @@ -2014,8 +2021,8 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ const int shift= 1+qpel; MotionEstContext *c= &s->m.me; int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); - int mx_context= av_log2(2*ABS(left->mx - top->mx)); - int my_context= av_log2(2*ABS(left->my - top->my)); + int mx_context= av_log2(2*FFABS(left->mx - top->mx)); + int my_context= av_log2(2*FFABS(left->my - top->my)); int s_context= 2*left->level + 2*top->level + tl->level + tr->level; int ref, best_ref, ref_score, ref_mx, ref_my; @@ -2044,10 +2051,10 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ s->m.mb_stride=2; s->m.mb_x= s->m.mb_y= 0; - s->m.me.skip= 0; + c->skip= 0; - assert(s->m.me. stride == stride); - assert(s->m.me.uvstride == uvstride); + assert(c-> stride == stride); + assert(c->uvstride == uvstride); c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); @@ -2091,7 +2098,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ assert(ref_my >= c->ymin); assert(ref_my <= c->ymax); - ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); + ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); ref_score+= 2*av_log2(2*ref)*c->penalty_factor; if(s->ref_mvs[ref]){ @@ -2204,7 +2211,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ } } -static always_inline int same_block(BlockNode *a, BlockNode *b){ +static av_always_inline int same_block(BlockNode *a, BlockNode *b){ if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); }else{ @@ -2218,17 +2225,17 @@ static void encode_q_branch2(SnowContext *s, int level, int x, int y){ const int index= (x + y*w) << rem_depth; int trx= (x+1)<block[index]; - BlockNode *left = x ? &s->block[index-1] : &null_block; - BlockNode *top = y ? &s->block[index-w] : &null_block; - BlockNode *tl = y && x ? &s->block[index-w-1] : left; - BlockNode *tr = y && trxblock[index-w+(1<block[index-1] : &null_block; + const BlockNode *top = y ? &s->block[index-w] : &null_block; + const BlockNode *tl = y && x ? &s->block[index-w-1] : left; + const BlockNode *tr = y && trxblock[index-w+(1<color[0]; int pcb= left->color[1]; int pcr= left->color[2]; int pmx, pmy; int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); - int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref; - int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref; + int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; + int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; int s_context= 2*left->level + 2*top->level + tl->level + tr->level; if(s->keyframe){ @@ -2271,10 +2278,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ const int rem_depth= s->block_max_depth - level; const int index= (x + y*w) << rem_depth; int trx= (x+1)<block[index-1] : &null_block; - BlockNode *top = y ? &s->block[index-w] : &null_block; - BlockNode *tl = y && x ? &s->block[index-w-1] : left; - BlockNode *tr = y && trxblock[index-w+(1<block[index-1] : &null_block; + const BlockNode *top = y ? &s->block[index-w] : &null_block; + const BlockNode *tl = y && x ? &s->block[index-w-1] : left; + const BlockNode *tr = y && trxblock[index-w+(1<level + 2*top->level + tl->level + tr->level; if(s->keyframe){ @@ -2283,16 +2290,14 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ } if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ - int type; + int type, mx, my; int l = left->color[0]; int cb= left->color[1]; int cr= left->color[2]; - int mx= mid_pred(left->mx, top->mx, tr->mx); - int my= mid_pred(left->my, top->my, tr->my); int ref = 0; int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); - int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); - int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); + int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); + int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; @@ -2317,12 +2322,12 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ } } -static void encode_blocks(SnowContext *s){ +static void encode_blocks(SnowContext *s, int search){ int x, y; int w= s->b_width; int h= s->b_height; - if(s->avctx->me_method == ME_ITER && !s->keyframe) + if(s->avctx->me_method == ME_ITER && !s->keyframe && search) iterative_me(s); for(y=0; yavctx->me_method == ME_ITER) + if(s->avctx->me_method == ME_ITER || !search) encode_q_branch2(s, 0, x, y); else encode_q_branch (s, 0, x, y); @@ -2516,16 +2521,16 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i } } -void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int y, x; DWTELEM * dst; for(y=0; y>1); - uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); - uint8_t *obmc4= obmc3+ (obmc_stride>>1); + const uint8_t *obmc1= obmc + y*obmc_stride; + const uint8_t *obmc2= obmc1+ (obmc_stride>>1); + const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + const uint8_t *obmc4= obmc3+ (obmc_stride>>1); dst = slice_buffer_get_line(sb, src_y + y); for(x=0; xb_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; const int b_stride= b_width; @@ -2584,136 +2588,7 @@ static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 obmc -= src_x; b_w += src_x; - src_x=0; - }else if(src_x + b_w > w){ - b_w = w - src_x; - } - if(src_y<0){ - obmc -= src_y*obmc_stride; - b_h += src_y; - src_y=0; - }else if(src_y + b_h> h){ - b_h = h - src_y; - } - - if(b_w<=0 || b_h<=0) return; - -assert(src_stride > 2*MB_SIZE + 5); -// old_dst += src_x + src_y*dst_stride; - dst8+= src_x + src_y*src_stride; -// src += src_x + src_y*src_stride; - - ptmp= tmp + 3*tmp_step; - block[0]= ptmp; - ptmp+=tmp_step; - pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); - - if(same_block(lt, rt)){ - block[1]= block[0]; - }else{ - block[1]= ptmp; - ptmp+=tmp_step; - pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); - } - - if(same_block(lt, lb)){ - block[2]= block[0]; - }else if(same_block(rt, lb)){ - block[2]= block[1]; - }else{ - block[2]= ptmp; - ptmp+=tmp_step; - pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); - } - - if(same_block(lt, rb) ){ - block[3]= block[0]; - }else if(same_block(rt, rb)){ - block[3]= block[1]; - }else if(same_block(lb, rb)){ - block[3]= block[2]; - }else{ - block[3]= ptmp; - pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); - } -#if 0 - for(y=0; y>1); - for(x=0; x>1); - for(x=0; x>1); - uint8_t *obmc4= obmc3+ (obmc_stride>>1); - for(x=0; xdsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); - STOP_TIMER("Inner add y block") -} -#endif -} - -//FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ - const int b_width = s->b_width << s->block_max_depth; - const int b_height= s->b_height << s->block_max_depth; - const int b_stride= b_width; - BlockNode *lt= &s->block[b_x + b_y*b_stride]; - BlockNode *rt= lt+1; - BlockNode *lb= lt+b_stride; - BlockNode *rb= lb+1; - uint8_t *block[4]; - int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; - uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align - uint8_t *ptmp; - int x,y; - - if(b_x<0){ - lt= rt; - lb= rb; - }else if(b_x + 1 >= b_width){ - rt= lt; - rb= lb; - } - if(b_y<0){ - lt= lb; - rt= rb; - }else if(b_y + 1 >= b_height){ - lb= lt; - rb= rt; - } - - if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 - obmc -= src_x; - b_w += src_x; - if(!offset_dst) + if(!sliced && !offset_dst) dst -= src_x; src_x=0; }else if(src_x + b_w > w){ @@ -2722,7 +2597,7 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8 if(src_y<0){ obmc -= src_y*obmc_stride; b_h += src_y; - if(!offset_dst) + if(!sliced && !offset_dst) dst -= src_y*dst_stride; src_y=0; }else if(src_y + b_h> h){ @@ -2732,7 +2607,7 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8 if(b_w<=0 || b_h<=0) return; assert(src_stride > 2*MB_SIZE + 5); - if(offset_dst) + if(!sliced && offset_dst) dst += src_x + src_y*dst_stride; dst8+= src_x + src_y*src_stride; // src += src_x + src_y*src_stride; @@ -2804,12 +2679,18 @@ assert(src_stride > 2*MB_SIZE + 5); } } #else + if(sliced){ + START_TIMER + + s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + STOP_TIMER("inner_add_yblock") + }else for(y=0; y>1); - uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); - uint8_t *obmc4= obmc3+ (obmc_stride>>1); + const uint8_t *obmc1= obmc + y*obmc_stride; + const uint8_t *obmc2= obmc1+ (obmc_stride>>1); + const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + const uint8_t *obmc4= obmc3+ (obmc_stride>>1); for(x=0; x 2*MB_SIZE + 5); #endif } -static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ +static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ Plane *p= &s->plane[plane_index]; const int mb_w= s->b_width << s->block_max_depth; const int mb_h= s->b_height << s->block_max_depth; @@ -2886,14 +2767,14 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock_buffered(s, sb, old_buffer, dst8, obmc, + add_yblock(s, 1, sb, old_buffer, dst8, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, w, h, w, ref_stride, obmc_stride, mb_x - 1, mb_y - 1, - add, plane_index); + add, 0, plane_index); STOP_TIMER("add_yblock") } @@ -2901,7 +2782,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * STOP_TIMER("predict_slice") } -static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ +static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ Plane *p= &s->plane[plane_index]; const int mb_w= s->b_width << s->block_max_depth; const int mb_h= s->b_height << s->block_max_depth; @@ -2943,7 +2824,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock(s, buf, dst8, obmc, + add_yblock(s, 0, NULL, buf, dst8, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, @@ -2958,7 +2839,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ STOP_TIMER("predict_slice") } -static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ +static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ const int mb_h= s->b_height << s->block_max_depth; int mb_y; for(mb_y=0; mb_y<=mb_h; mb_y++) @@ -2994,7 +2875,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, + add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); for(y2= FFMAX(y, 0); y2b_width << s->block_max_depth; const int b_height = s->b_height<< s->block_max_depth; int index= x + y*b_stride; - BlockNode *b = &s->block[index]; - BlockNode *left = x ? &s->block[index-1] : &null_block; - BlockNode *top = y ? &s->block[index-b_stride] : &null_block; - BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; - BlockNode *tr = y && x+wblock[index-b_stride+w] : tl; + const BlockNode *b = &s->block[index]; + const BlockNode *left = x ? &s->block[index-1] : &null_block; + const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; + const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; + const BlockNode *tr = y && x+wblock[index-b_stride+w] : tl; int dmx, dmy; -// int mx_context= av_log2(2*ABS(left->mx - top->mx)); -// int my_context= av_log2(2*ABS(left->my - top->my)); +// int mx_context= av_log2(2*FFABS(left->mx - top->mx)); +// int my_context= av_log2(2*FFABS(left->my - top->my)); if(x<0 || x>=b_stride || y>=b_height) return 0; @@ -3045,15 +2926,15 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){ //FIXME try accurate rate //FIXME intra and inter predictors if surrounding blocks arent the same type if(b->type & BLOCK_INTRA){ - return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0])) - + av_log2(2*ABS(left->color[1] - b->color[1])) - + av_log2(2*ABS(left->color[2] - b->color[2]))); + return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) + + av_log2(2*FFABS(left->color[1] - b->color[1])) + + av_log2(2*FFABS(left->color[2] - b->color[2]))); }else{ pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); dmx-= b->mx; dmy-= b->my; - return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda - + av_log2(2*ABS(dmy)) + return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda + + av_log2(2*FFABS(dmy)) + av_log2(2*b->ref)); } } @@ -3062,7 +2943,6 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con Plane *p= &s->plane[plane_index]; const int block_size = MB_SIZE >> s->block_max_depth; const int block_w = plane_index ? block_size/2 : block_size; - const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; @@ -3165,7 +3045,6 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ uint8_t *src= s-> input_picture.data[plane_index]; static const DWTELEM zero_dst[4096]; //FIXME const int b_stride = s->b_width << s->block_max_depth; - const int b_height = s->b_height<< s->block_max_depth; const int w= p->width; const int h= p->height; int distortion= 0; @@ -3178,7 +3057,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, zero_dst, dst, obmc, + add_yblock(s, 0, NULL, zero_dst, dst, obmc, x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); //FIXME find a cleaner/simpler way to skip the outside stuff @@ -3218,7 +3097,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ return distortion + rate*penalty_factor; } -static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ +static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ const int b_stride= s->b_width << s->block_max_depth; BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode backup= *block; @@ -3257,12 +3136,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 } /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ -static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ +static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ int p[2] = {p0, p1}; return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); } -static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ +static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ const int b_stride= s->b_width << s->block_max_depth; BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; @@ -3528,7 +3407,7 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b const int level= b->level; const int w= b->width; const int h= b->height; - const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); int x,y, thres1, thres2; // START_TIMER @@ -3587,7 +3466,7 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){ const int w= b->width; - const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int x,y; @@ -3615,7 +3494,7 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ const int w= b->width; const int h= b->height; - const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; int x,y; @@ -3668,7 +3547,7 @@ static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand // START_TIMER - DWTELEM * line; + DWTELEM * line=0; // silence silly "could be used without having been initialized" warning DWTELEM * prev; if (start_y != 0) @@ -3727,8 +3606,14 @@ static void encode_header(SnowContext *s){ memset(kstate, MID_STATE, sizeof(kstate)); put_rac(&s->c, kstate, s->keyframe); - if(s->keyframe || s->always_reset) + if(s->keyframe || s->always_reset){ reset_contexts(s); + s->last_spatial_decomposition_type= + s->last_qlog= + s->last_qbias= + s->last_mv_scale= + s->last_block_max_depth= 0; + } if(s->keyframe){ put_symbol(&s->c, s->header_state, s->version, 0); put_rac(&s->c, s->header_state, s->always_reset); @@ -3751,11 +3636,17 @@ static void encode_header(SnowContext *s){ } } } - put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0); - put_symbol(&s->c, s->header_state, s->qlog, 1); - put_symbol(&s->c, s->header_state, s->mv_scale, 0); - put_symbol(&s->c, s->header_state, s->qbias, 1); - put_symbol(&s->c, s->header_state, s->block_max_depth, 0); + put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); + put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); + put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); + put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); + put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); + + s->last_spatial_decomposition_type= s->spatial_decomposition_type; + s->last_qlog = s->qlog; + s->last_qbias = s->qbias; + s->last_mv_scale = s->mv_scale; + s->last_block_max_depth = s->block_max_depth; } static int decode_header(SnowContext *s){ @@ -3765,8 +3656,14 @@ static int decode_header(SnowContext *s){ memset(kstate, MID_STATE, sizeof(kstate)); s->keyframe= get_rac(&s->c, kstate); - if(s->keyframe || s->always_reset) + if(s->keyframe || s->always_reset){ reset_contexts(s); + s->spatial_decomposition_type= + s->qlog= + s->qbias= + s->mv_scale= + s->block_max_depth= 0; + } if(s->keyframe){ s->version= get_symbol(&s->c, s->header_state, 0); if(s->version>0){ @@ -3797,16 +3694,16 @@ static int decode_header(SnowContext *s){ } } - s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0); + s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); if(s->spatial_decomposition_type > 2){ av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); return -1; } - s->qlog= get_symbol(&s->c, s->header_state, 1); - s->mv_scale= get_symbol(&s->c, s->header_state, 0); - s->qbias= get_symbol(&s->c, s->header_state, 1); - s->block_max_depth= get_symbol(&s->c, s->header_state, 0); + s->qlog += get_symbol(&s->c, s->header_state, 1); + s->mv_scale += get_symbol(&s->c, s->header_state, 1); + s->qbias += get_symbol(&s->c, s->header_state, 1); + s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); if(s->block_max_depth > 1 || s->block_max_depth < 0){ av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); s->block_max_depth= 0; @@ -3957,13 +3854,13 @@ static int qscale2qlog(int qscale){ + 61*QROOT/8; //<64 >60 } -static void ratecontrol_1pass(SnowContext *s, AVFrame *pict) +static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) { /* estimate the frame's complexity as a sum of weighted dwt coefs. * FIXME we know exact mv bits at this point, * but ratecontrol isn't set up to include them. */ uint32_t coef_sum= 0; - int level, orientation; + int level, orientation, delta_qlog; for(level=0; levelspatial_decomposition_count; level++){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ @@ -3972,7 +3869,7 @@ static void ratecontrol_1pass(SnowContext *s, AVFrame *pict) const int w= b->width; const int h= b->height; const int stride= b->stride; - const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16); + const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); const int qdiv= (1<<16)/qmul; int x, y; @@ -3999,8 +3896,12 @@ static void ratecontrol_1pass(SnowContext *s, AVFrame *pict) } pict->quality= ff_rate_estimate_qscale(&s->m, 1); + if (pict->quality < 0) + return INT_MIN; s->lambda= pict->quality * 3/2; - s->qlog= qscale2qlog(pict->quality); + delta_qlog= qscale2qlog(pict->quality) - s->qlog; + s->qlog+= delta_qlog; + return delta_qlog; } static void calculate_vissual_weight(SnowContext *s, Plane *p){ @@ -4090,7 +3991,7 @@ static int encode_init(AVCodecContext *avctx) // case PIX_FMT_YUV410P: s->colorspace_type= 0; break; -/* case PIX_FMT_RGBA32: +/* case PIX_FMT_RGB32: s->colorspace= 1; break;*/ default: @@ -4162,6 +4063,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, const int width= s->avctx->width; const int height= s->avctx->height; int level, orientation, plane_index, i, y; + uint8_t rc_header_bak[sizeof(s->header_state)]; + uint8_t rc_block_bak[sizeof(s->block_state)]; ff_init_range_encoder(c, buf, buf_size); ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); @@ -4180,8 +4083,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, s->m.pict_type = pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; s->keyframe= pict->pict_type==FF_I_TYPE; - if(!(avctx->flags&CODEC_FLAG_QSCALE)) + if(!(avctx->flags&CODEC_FLAG_QSCALE)) { pict->quality= ff_rate_estimate_qscale(&s->m, 0); + if (pict->quality < 0) + return -1; + } }else{ s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; s->m.pict_type= @@ -4244,6 +4150,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, s->dsp= s->m.dsp; } + if(s->pass1_rc){ + memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); + memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); + } + redo_frame: s->m.pict_type = pict->pict_type; @@ -4251,7 +4162,7 @@ redo_frame: encode_header(s); s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); - encode_blocks(s); + encode_blocks(s, 1); s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; for(plane_index=0; plane_index<3; plane_index++){ @@ -4280,7 +4191,6 @@ redo_frame: pict->pict_type= FF_I_TYPE; s->keyframe=1; s->current_picture.key_frame=1; - reset_contexts(s); goto redo_frame; } @@ -4294,8 +4204,19 @@ redo_frame: ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); - if(s->pass1_rc && plane_index==0) - ratecontrol_1pass(s, pict); + if(s->pass1_rc && plane_index==0){ + int delta_qlog = ratecontrol_1pass(s, pict); + if (delta_qlog <= INT_MIN) + return -1; + if(delta_qlog){ + //reordering qlog in the bitstream would eliminate this reset + ff_init_range_encoder(c, buf, buf_size); + memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); + memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); + encode_header(s); + encode_blocks(s, 0); + } + } for(level=0; levelspatial_decomposition_count; level++){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ @@ -4373,10 +4294,15 @@ STOP_TIMER("pred-conv")} s->m.current_picture.quality = pict->quality; s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); if(s->pass1_rc) - ff_rate_estimate_qscale(&s->m, 0); + if (ff_rate_estimate_qscale(&s->m, 0) < 0) + return -1; if(avctx->flags&CODEC_FLAG_PASS1) ff_write_pass1_stats(&s->m); s->m.last_pict_type = s->m.pict_type; + avctx->frame_bits = s->m.frame_bits; + avctx->mv_bits = s->m.mv_bits; + avctx->misc_bits = s->m.misc_bits; + avctx->p_tex_bits = s->m.p_tex_bits; emms_c(); @@ -4661,7 +4587,7 @@ int main(){ ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); for(i=0; i20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); + if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); #if 0 printf("testing AC coder\n"); @@ -4671,7 +4597,7 @@ int main(){ for(i=-256; i<256; i++){ START_TIMER - put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1); + put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); STOP_TIMER("put_symbol") } ff_rac_terminate(&s.c); @@ -4685,7 +4611,7 @@ STOP_TIMER("put_symbol") START_TIMER j= get_symbol(&s.c, s.header_state, 1); STOP_TIMER("get_symbol") - if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j); + if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); } #endif { @@ -4714,9 +4640,9 @@ int64_t g=0; for(x=0; x