]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "snow.h"
28 #include "snowdata.h"
29
30 #include "rangecoder.h"
31 #include "mathops.h"
32 #include "h263.h"
33
34
35 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
36                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
37     int y, x;
38     IDWTELEM * dst;
39     for(y=0; y<b_h; y++){
40         //FIXME ugly misuse of obmc_stride
41         const uint8_t *obmc1= obmc + y*obmc_stride;
42         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
43         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
44         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
45         dst = slice_buffer_get_line(sb, src_y + y);
46         for(x=0; x<b_w; x++){
47             int v=   obmc1[x] * block[3][x + y*src_stride]
48                     +obmc2[x] * block[2][x + y*src_stride]
49                     +obmc3[x] * block[1][x + y*src_stride]
50                     +obmc4[x] * block[0][x + y*src_stride];
51
52             v <<= 8 - LOG2_OBMC_MAX;
53             if(FRAC_BITS != 8){
54                 v >>= 8 - FRAC_BITS;
55             }
56             if(add){
57                 v += dst[x + src_x];
58                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
59                 if(v&(~255)) v= ~(v>>31);
60                 dst8[x + y*src_stride] = v;
61             }else{
62                 dst[x + src_x] -= v;
63             }
64         }
65     }
66 }
67
68 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
69     int plane_index, level, orientation;
70
71     for(plane_index=0; plane_index<3; plane_index++){
72         for(level=0; level<MAX_DECOMPOSITIONS; level++){
73             for(orientation=level ? 1:0; orientation<4; orientation++){
74                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
75             }
76         }
77     }
78     memset(s->header_state, MID_STATE, sizeof(s->header_state));
79     memset(s->block_state, MID_STATE, sizeof(s->block_state));
80 }
81
82 int ff_snow_alloc_blocks(SnowContext *s){
83     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
84     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
85
86     s->b_width = w;
87     s->b_height= h;
88
89     av_free(s->block);
90     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
91     return 0;
92 }
93
94 static void init_qexp(void){
95     int i;
96     double v=128;
97
98     for(i=0; i<QROOT; i++){
99         ff_qexp[i]= lrintf(v);
100         v *= pow(2, 1.0 / QROOT);
101     }
102 }
103 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
104     static const uint8_t weight[64]={
105     8,7,6,5,4,3,2,1,
106     7,7,0,0,0,0,0,1,
107     6,0,6,0,0,0,2,0,
108     5,0,0,5,0,3,0,0,
109     4,0,0,0,4,0,0,0,
110     3,0,0,5,0,3,0,0,
111     2,0,6,0,0,0,2,0,
112     1,7,0,0,0,0,0,1,
113     };
114
115     static const uint8_t brane[256]={
116     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
117     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
118     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
119     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
120     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
121     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
122     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
123     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
124     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
125     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
126     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
127     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
128     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
129     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
130     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
131     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
132     };
133
134     static const uint8_t needs[16]={
135     0,1,0,0,
136     2,4,2,0,
137     0,1,0,0,
138     15
139     };
140
141     int x, y, b, r, l;
142     int16_t tmpIt   [64*(32+HTAPS_MAX)];
143     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
144     int16_t *tmpI= tmpIt;
145     uint8_t *tmp2= tmp2t[0];
146     const uint8_t *hpel[11];
147     av_assert2(dx<16 && dy<16);
148     r= brane[dx + 16*dy]&15;
149     l= brane[dx + 16*dy]>>4;
150
151     b= needs[l] | needs[r];
152     if(p && !p->diag_mc)
153         b= 15;
154
155     if(b&5){
156         for(y=0; y < b_h+HTAPS_MAX-1; y++){
157             for(x=0; x < b_w; x++){
158                 int a_1=src[x + HTAPS_MAX/2-4];
159                 int a0= src[x + HTAPS_MAX/2-3];
160                 int a1= src[x + HTAPS_MAX/2-2];
161                 int a2= src[x + HTAPS_MAX/2-1];
162                 int a3= src[x + HTAPS_MAX/2+0];
163                 int a4= src[x + HTAPS_MAX/2+1];
164                 int a5= src[x + HTAPS_MAX/2+2];
165                 int a6= src[x + HTAPS_MAX/2+3];
166                 int am=0;
167                 if(!p || p->fast_mc){
168                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
169                     tmpI[x]= am;
170                     am= (am+16)>>5;
171                 }else{
172                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
173                     tmpI[x]= am;
174                     am= (am+32)>>6;
175                 }
176
177                 if(am&(~255)) am= ~(am>>31);
178                 tmp2[x]= am;
179             }
180             tmpI+= 64;
181             tmp2+= stride;
182             src += stride;
183         }
184         src -= stride*y;
185     }
186     src += HTAPS_MAX/2 - 1;
187     tmp2= tmp2t[1];
188
189     if(b&2){
190         for(y=0; y < b_h; y++){
191             for(x=0; x < b_w+1; x++){
192                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
193                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
194                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
195                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
196                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
197                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
198                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
199                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
200                 int am=0;
201                 if(!p || p->fast_mc)
202                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
203                 else
204                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
205
206                 if(am&(~255)) am= ~(am>>31);
207                 tmp2[x]= am;
208             }
209             src += stride;
210             tmp2+= stride;
211         }
212         src -= stride*y;
213     }
214     src += stride*(HTAPS_MAX/2 - 1);
215     tmp2= tmp2t[2];
216     tmpI= tmpIt;
217     if(b&4){
218         for(y=0; y < b_h; y++){
219             for(x=0; x < b_w; x++){
220                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
221                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
222                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
223                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
224                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
225                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
226                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
227                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
228                 int am=0;
229                 if(!p || p->fast_mc)
230                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
231                 else
232                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
233                 if(am&(~255)) am= ~(am>>31);
234                 tmp2[x]= am;
235             }
236             tmpI+= 64;
237             tmp2+= stride;
238         }
239     }
240
241     hpel[ 0]= src;
242     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
243     hpel[ 2]= src + 1;
244
245     hpel[ 4]= tmp2t[1];
246     hpel[ 5]= tmp2t[2];
247     hpel[ 6]= tmp2t[1] + 1;
248
249     hpel[ 8]= src + stride;
250     hpel[ 9]= hpel[1] + stride;
251     hpel[10]= hpel[8] + 1;
252
253     if(b==15){
254         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
255         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
256         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
257         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
258         dx&=7;
259         dy&=7;
260         for(y=0; y < b_h; y++){
261             for(x=0; x < b_w; x++){
262                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
263                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
264             }
265             src1+=stride;
266             src2+=stride;
267             src3+=stride;
268             src4+=stride;
269             dst +=stride;
270         }
271     }else{
272         const uint8_t *src1= hpel[l];
273         const uint8_t *src2= hpel[r];
274         int a= weight[((dx&7) + (8*(dy&7)))];
275         int b= 8-a;
276         for(y=0; y < b_h; y++){
277             for(x=0; x < b_w; x++){
278                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
279             }
280             src1+=stride;
281             src2+=stride;
282             dst +=stride;
283         }
284     }
285 }
286
287 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
288     if(block->type & BLOCK_INTRA){
289         int x, y;
290         const unsigned color  = block->color[plane_index];
291         const unsigned color4 = color*0x01010101;
292         if(b_w==32){
293             for(y=0; y < b_h; y++){
294                 *(uint32_t*)&dst[0 + y*stride]= color4;
295                 *(uint32_t*)&dst[4 + y*stride]= color4;
296                 *(uint32_t*)&dst[8 + y*stride]= color4;
297                 *(uint32_t*)&dst[12+ y*stride]= color4;
298                 *(uint32_t*)&dst[16+ y*stride]= color4;
299                 *(uint32_t*)&dst[20+ y*stride]= color4;
300                 *(uint32_t*)&dst[24+ y*stride]= color4;
301                 *(uint32_t*)&dst[28+ y*stride]= color4;
302             }
303         }else if(b_w==16){
304             for(y=0; y < b_h; y++){
305                 *(uint32_t*)&dst[0 + y*stride]= color4;
306                 *(uint32_t*)&dst[4 + y*stride]= color4;
307                 *(uint32_t*)&dst[8 + y*stride]= color4;
308                 *(uint32_t*)&dst[12+ y*stride]= color4;
309             }
310         }else if(b_w==8){
311             for(y=0; y < b_h; y++){
312                 *(uint32_t*)&dst[0 + y*stride]= color4;
313                 *(uint32_t*)&dst[4 + y*stride]= color4;
314             }
315         }else if(b_w==4){
316             for(y=0; y < b_h; y++){
317                 *(uint32_t*)&dst[0 + y*stride]= color4;
318             }
319         }else{
320             for(y=0; y < b_h; y++){
321                 for(x=0; x < b_w; x++){
322                     dst[x + y*stride]= color;
323                 }
324             }
325         }
326     }else{
327         uint8_t *src= s->last_picture[block->ref].data[plane_index];
328         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
329         int mx= block->mx*scale;
330         int my= block->my*scale;
331         const int dx= mx&15;
332         const int dy= my&15;
333         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
334         sx += (mx>>4) - (HTAPS_MAX/2-1);
335         sy += (my>>4) - (HTAPS_MAX/2-1);
336         src += sx + sy*stride;
337         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
338            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
339             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
340             src= tmp + MB_SIZE;
341         }
342
343         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
344
345 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
346 //        assert(!(b_w&(b_w-1)));
347         av_assert2(b_w>1 && b_h>1);
348         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
349         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
350             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
351         else if(b_w==32){
352             int y;
353             for(y=0; y<b_h; y+=16){
354                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
355                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
356             }
357         }else if(b_w==b_h)
358             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
359         else if(b_w==2*b_h){
360             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
361             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
362         }else{
363             av_assert2(2*b_w==b_h);
364             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
365             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
366         }
367     }
368 }
369
370 #define mca(dx,dy,b_w)\
371 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
372     av_assert2(h==b_w);\
373     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
374 }
375
376 mca( 0, 0,16)
377 mca( 8, 0,16)
378 mca( 0, 8,16)
379 mca( 8, 8,16)
380 mca( 0, 0,8)
381 mca( 8, 0,8)
382 mca( 0, 8,8)
383 mca( 8, 8,8)
384
385 av_cold int ff_snow_common_init(AVCodecContext *avctx){
386     SnowContext *s = avctx->priv_data;
387     int width, height;
388     int i, j;
389
390     s->avctx= avctx;
391     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
392
393     ff_dsputil_init(&s->dsp, avctx);
394     ff_dwt_init(&s->dwt);
395
396 #define mcf(dx,dy)\
397     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
398     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
399         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
400     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
401     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
402         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
403
404     mcf( 0, 0)
405     mcf( 4, 0)
406     mcf( 8, 0)
407     mcf(12, 0)
408     mcf( 0, 4)
409     mcf( 4, 4)
410     mcf( 8, 4)
411     mcf(12, 4)
412     mcf( 0, 8)
413     mcf( 4, 8)
414     mcf( 8, 8)
415     mcf(12, 8)
416     mcf( 0,12)
417     mcf( 4,12)
418     mcf( 8,12)
419     mcf(12,12)
420
421 #define mcfh(dx,dy)\
422     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
423     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
424         mc_block_hpel ## dx ## dy ## 16;\
425     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
426     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
427         mc_block_hpel ## dx ## dy ## 8;
428
429     mcfh(0, 0)
430     mcfh(8, 0)
431     mcfh(0, 8)
432     mcfh(8, 8)
433
434     init_qexp();
435
436 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
437
438     width= s->avctx->width;
439     height= s->avctx->height;
440
441     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
442     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
443     s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
444     s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
445
446     for(i=0; i<MAX_REF_FRAMES; i++)
447         for(j=0; j<MAX_REF_FRAMES; j++)
448             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
449
450     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
451     s->scratchbuf = av_mallocz(s->mconly_picture.linesize[0]*7*MB_SIZE);
452
453     return 0;
454 }
455
456 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
457     SnowContext *s = avctx->priv_data;
458     int plane_index, level, orientation;
459
460     for(plane_index=0; plane_index<3; plane_index++){
461         int w= s->avctx->width;
462         int h= s->avctx->height;
463
464         if(plane_index){
465             w>>= s->chroma_h_shift;
466             h>>= s->chroma_v_shift;
467         }
468         s->plane[plane_index].width = w;
469         s->plane[plane_index].height= h;
470
471         for(level=s->spatial_decomposition_count-1; level>=0; level--){
472             for(orientation=level ? 1 : 0; orientation<4; orientation++){
473                 SubBand *b= &s->plane[plane_index].band[level][orientation];
474
475                 b->buf= s->spatial_dwt_buffer;
476                 b->level= level;
477                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
478                 b->width = (w + !(orientation&1))>>1;
479                 b->height= (h + !(orientation>1))>>1;
480
481                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
482                 b->buf_x_offset = 0;
483                 b->buf_y_offset = 0;
484
485                 if(orientation&1){
486                     b->buf += (w+1)>>1;
487                     b->buf_x_offset = (w+1)>>1;
488                 }
489                 if(orientation>1){
490                     b->buf += b->stride>>1;
491                     b->buf_y_offset = b->stride_line >> 1;
492                 }
493                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
494
495                 if(level)
496                     b->parent= &s->plane[plane_index].band[level-1][orientation];
497                 //FIXME avoid this realloc
498                 av_freep(&b->x_coeff);
499                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
500             }
501             w= (w+1)>>1;
502             h= (h+1)>>1;
503         }
504     }
505
506     return 0;
507 }
508
509 #define USE_HALFPEL_PLANE 0
510
511 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
512     int p,x,y;
513
514     for(p=0; p<3; p++){
515         int is_chroma= !!p;
516         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
517         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
518         int ls= frame->linesize[p];
519         uint8_t *src= frame->data[p];
520
521         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
522         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
523         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
524
525         halfpel[0][p]= src;
526         for(y=0; y<h; y++){
527             for(x=0; x<w; x++){
528                 int i= y*ls + x;
529
530                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
531             }
532         }
533         for(y=0; y<h; y++){
534             for(x=0; x<w; x++){
535                 int i= y*ls + x;
536
537                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
538             }
539         }
540         src= halfpel[1][p];
541         for(y=0; y<h; y++){
542             for(x=0; x<w; x++){
543                 int i= y*ls + x;
544
545                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
546             }
547         }
548
549 //FIXME border!
550     }
551 }
552
553 void ff_snow_release_buffer(AVCodecContext *avctx)
554 {
555     SnowContext *s = avctx->priv_data;
556     int i;
557
558     if(s->last_picture[s->max_ref_frames-1].data[0]){
559         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
560         for(i=0; i<9; i++)
561             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
562                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
563     }
564 }
565
566 int ff_snow_frame_start(SnowContext *s){
567    AVFrame tmp;
568    int w= s->avctx->width; //FIXME round up to x16 ?
569    int h= s->avctx->height;
570
571     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
572         s->dsp.draw_edges(s->current_picture.data[0],
573                           s->current_picture.linesize[0], w   , h   ,
574                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
575         s->dsp.draw_edges(s->current_picture.data[1],
576                           s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
577                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
578         s->dsp.draw_edges(s->current_picture.data[2],
579                           s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
580                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
581     }
582
583     ff_snow_release_buffer(s->avctx);
584
585     tmp= s->last_picture[s->max_ref_frames-1];
586     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
587     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
588     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
589         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
590     s->last_picture[0]= s->current_picture;
591     s->current_picture= tmp;
592
593     if(s->keyframe){
594         s->ref_frames= 0;
595     }else{
596         int i;
597         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
598             if(i && s->last_picture[i-1].key_frame)
599                 break;
600         s->ref_frames= i;
601         if(s->ref_frames==0){
602             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
603             return -1;
604         }
605     }
606
607     s->current_picture.reference= 3;
608     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
609         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
610         return -1;
611     }
612
613     s->current_picture.key_frame= s->keyframe;
614
615     return 0;
616 }
617
618 av_cold void ff_snow_common_end(SnowContext *s)
619 {
620     int plane_index, level, orientation, i;
621
622     av_freep(&s->spatial_dwt_buffer);
623     av_freep(&s->temp_dwt_buffer);
624     av_freep(&s->spatial_idwt_buffer);
625     av_freep(&s->temp_idwt_buffer);
626
627     s->m.me.temp= NULL;
628     av_freep(&s->m.me.scratchpad);
629     av_freep(&s->m.me.map);
630     av_freep(&s->m.me.score_map);
631     av_freep(&s->m.obmc_scratchpad);
632
633     av_freep(&s->block);
634     av_freep(&s->scratchbuf);
635
636     for(i=0; i<MAX_REF_FRAMES; i++){
637         av_freep(&s->ref_mvs[i]);
638         av_freep(&s->ref_scores[i]);
639         if(s->last_picture[i].data[0]) {
640             av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
641             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
642         }
643     }
644
645     for(plane_index=0; plane_index<3; plane_index++){
646         for(level=s->spatial_decomposition_count-1; level>=0; level--){
647             for(orientation=level ? 1 : 0; orientation<4; orientation++){
648                 SubBand *b= &s->plane[plane_index].band[level][orientation];
649
650                 av_freep(&b->x_coeff);
651             }
652         }
653     }
654     if (s->mconly_picture.data[0])
655         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
656     if (s->current_picture.data[0])
657         s->avctx->release_buffer(s->avctx, &s->current_picture);
658 }