]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
rv40: don't always do the full prev_type search
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "snow.h"
28 #include "snowdata.h"
29
30 #include "rangecoder.h"
31 #include "mathops.h"
32 #include "h263.h"
33
34 #undef NDEBUG
35 #include <assert.h>
36
37
38 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
39                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
40     int y, x;
41     IDWTELEM * dst;
42     for(y=0; y<b_h; y++){
43         //FIXME ugly misuse of obmc_stride
44         const uint8_t *obmc1= obmc + y*obmc_stride;
45         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
46         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
47         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
48         dst = slice_buffer_get_line(sb, src_y + y);
49         for(x=0; x<b_w; x++){
50             int v=   obmc1[x] * block[3][x + y*src_stride]
51                     +obmc2[x] * block[2][x + y*src_stride]
52                     +obmc3[x] * block[1][x + y*src_stride]
53                     +obmc4[x] * block[0][x + y*src_stride];
54
55             v <<= 8 - LOG2_OBMC_MAX;
56             if(FRAC_BITS != 8){
57                 v >>= 8 - FRAC_BITS;
58             }
59             if(add){
60                 v += dst[x + src_x];
61                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
62                 if(v&(~255)) v= ~(v>>31);
63                 dst8[x + y*src_stride] = v;
64             }else{
65                 dst[x + src_x] -= v;
66             }
67         }
68     }
69 }
70
71 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
72     int plane_index, level, orientation;
73
74     for(plane_index=0; plane_index<3; plane_index++){
75         for(level=0; level<MAX_DECOMPOSITIONS; level++){
76             for(orientation=level ? 1:0; orientation<4; orientation++){
77                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
78             }
79         }
80     }
81     memset(s->header_state, MID_STATE, sizeof(s->header_state));
82     memset(s->block_state, MID_STATE, sizeof(s->block_state));
83 }
84
85 int ff_snow_alloc_blocks(SnowContext *s){
86     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
87     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
88
89     s->b_width = w;
90     s->b_height= h;
91
92     av_free(s->block);
93     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
94     return 0;
95 }
96
97 static void init_qexp(void){
98     int i;
99     double v=128;
100
101     for(i=0; i<QROOT; i++){
102         ff_qexp[i]= lrintf(v);
103         v *= pow(2, 1.0 / QROOT);
104     }
105 }
106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
107     static const uint8_t weight[64]={
108     8,7,6,5,4,3,2,1,
109     7,7,0,0,0,0,0,1,
110     6,0,6,0,0,0,2,0,
111     5,0,0,5,0,3,0,0,
112     4,0,0,0,4,0,0,0,
113     3,0,0,5,0,3,0,0,
114     2,0,6,0,0,0,2,0,
115     1,7,0,0,0,0,0,1,
116     };
117
118     static const uint8_t brane[256]={
119     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
120     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
121     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
122     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
123     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
124     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
125     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
126     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
127     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
128     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
129     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
130     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
131     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
132     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
133     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
134     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
135     };
136
137     static const uint8_t needs[16]={
138     0,1,0,0,
139     2,4,2,0,
140     0,1,0,0,
141     15
142     };
143
144     int x, y, b, r, l;
145     int16_t tmpIt   [64*(32+HTAPS_MAX)];
146     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
147     int16_t *tmpI= tmpIt;
148     uint8_t *tmp2= tmp2t[0];
149     const uint8_t *hpel[11];
150     assert(dx<16 && dy<16);
151     r= brane[dx + 16*dy]&15;
152     l= brane[dx + 16*dy]>>4;
153
154     b= needs[l] | needs[r];
155     if(p && !p->diag_mc)
156         b= 15;
157
158     if(b&5){
159         for(y=0; y < b_h+HTAPS_MAX-1; y++){
160             for(x=0; x < b_w; x++){
161                 int a_1=src[x + HTAPS_MAX/2-4];
162                 int a0= src[x + HTAPS_MAX/2-3];
163                 int a1= src[x + HTAPS_MAX/2-2];
164                 int a2= src[x + HTAPS_MAX/2-1];
165                 int a3= src[x + HTAPS_MAX/2+0];
166                 int a4= src[x + HTAPS_MAX/2+1];
167                 int a5= src[x + HTAPS_MAX/2+2];
168                 int a6= src[x + HTAPS_MAX/2+3];
169                 int am=0;
170                 if(!p || p->fast_mc){
171                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
172                     tmpI[x]= am;
173                     am= (am+16)>>5;
174                 }else{
175                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
176                     tmpI[x]= am;
177                     am= (am+32)>>6;
178                 }
179
180                 if(am&(~255)) am= ~(am>>31);
181                 tmp2[x]= am;
182             }
183             tmpI+= 64;
184             tmp2+= stride;
185             src += stride;
186         }
187         src -= stride*y;
188     }
189     src += HTAPS_MAX/2 - 1;
190     tmp2= tmp2t[1];
191
192     if(b&2){
193         for(y=0; y < b_h; y++){
194             for(x=0; x < b_w+1; x++){
195                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
196                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
197                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
198                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
199                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
200                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
201                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
202                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
203                 int am=0;
204                 if(!p || p->fast_mc)
205                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
206                 else
207                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
208
209                 if(am&(~255)) am= ~(am>>31);
210                 tmp2[x]= am;
211             }
212             src += stride;
213             tmp2+= stride;
214         }
215         src -= stride*y;
216     }
217     src += stride*(HTAPS_MAX/2 - 1);
218     tmp2= tmp2t[2];
219     tmpI= tmpIt;
220     if(b&4){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w; x++){
223                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
224                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
225                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
226                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
227                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
228                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
229                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
230                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
236                 if(am&(~255)) am= ~(am>>31);
237                 tmp2[x]= am;
238             }
239             tmpI+= 64;
240             tmp2+= stride;
241         }
242     }
243
244     hpel[ 0]= src;
245     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
246     hpel[ 2]= src + 1;
247
248     hpel[ 4]= tmp2t[1];
249     hpel[ 5]= tmp2t[2];
250     hpel[ 6]= tmp2t[1] + 1;
251
252     hpel[ 8]= src + stride;
253     hpel[ 9]= hpel[1] + stride;
254     hpel[10]= hpel[8] + 1;
255
256     if(b==15){
257         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
258         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
259         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
260         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
261         dx&=7;
262         dy&=7;
263         for(y=0; y < b_h; y++){
264             for(x=0; x < b_w; x++){
265                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
266                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
267             }
268             src1+=stride;
269             src2+=stride;
270             src3+=stride;
271             src4+=stride;
272             dst +=stride;
273         }
274     }else{
275         const uint8_t *src1= hpel[l];
276         const uint8_t *src2= hpel[r];
277         int a= weight[((dx&7) + (8*(dy&7)))];
278         int b= 8-a;
279         for(y=0; y < b_h; y++){
280             for(x=0; x < b_w; x++){
281                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
282             }
283             src1+=stride;
284             src2+=stride;
285             dst +=stride;
286         }
287     }
288 }
289
290 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
291     if(block->type & BLOCK_INTRA){
292         int x, y;
293         const unsigned color  = block->color[plane_index];
294         const unsigned color4 = color*0x01010101;
295         if(b_w==32){
296             for(y=0; y < b_h; y++){
297                 *(uint32_t*)&dst[0 + y*stride]= color4;
298                 *(uint32_t*)&dst[4 + y*stride]= color4;
299                 *(uint32_t*)&dst[8 + y*stride]= color4;
300                 *(uint32_t*)&dst[12+ y*stride]= color4;
301                 *(uint32_t*)&dst[16+ y*stride]= color4;
302                 *(uint32_t*)&dst[20+ y*stride]= color4;
303                 *(uint32_t*)&dst[24+ y*stride]= color4;
304                 *(uint32_t*)&dst[28+ y*stride]= color4;
305             }
306         }else if(b_w==16){
307             for(y=0; y < b_h; y++){
308                 *(uint32_t*)&dst[0 + y*stride]= color4;
309                 *(uint32_t*)&dst[4 + y*stride]= color4;
310                 *(uint32_t*)&dst[8 + y*stride]= color4;
311                 *(uint32_t*)&dst[12+ y*stride]= color4;
312             }
313         }else if(b_w==8){
314             for(y=0; y < b_h; y++){
315                 *(uint32_t*)&dst[0 + y*stride]= color4;
316                 *(uint32_t*)&dst[4 + y*stride]= color4;
317             }
318         }else if(b_w==4){
319             for(y=0; y < b_h; y++){
320                 *(uint32_t*)&dst[0 + y*stride]= color4;
321             }
322         }else{
323             for(y=0; y < b_h; y++){
324                 for(x=0; x < b_w; x++){
325                     dst[x + y*stride]= color;
326                 }
327             }
328         }
329     }else{
330         uint8_t *src= s->last_picture[block->ref].data[plane_index];
331         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
332         int mx= block->mx*scale;
333         int my= block->my*scale;
334         const int dx= mx&15;
335         const int dy= my&15;
336         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
337         sx += (mx>>4) - (HTAPS_MAX/2-1);
338         sy += (my>>4) - (HTAPS_MAX/2-1);
339         src += sx + sy*stride;
340         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
341            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
342             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
343             src= tmp + MB_SIZE;
344         }
345 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
346 //        assert(!(b_w&(b_w-1)));
347         assert(b_w>1 && b_h>1);
348         assert((tab_index>=0 && tab_index<4) || b_w==32);
349         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
350             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
351         else if(b_w==32){
352             int y;
353             for(y=0; y<b_h; y+=16){
354                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
355                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
356             }
357         }else if(b_w==b_h)
358             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
359         else if(b_w==2*b_h){
360             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
361             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
362         }else{
363             assert(2*b_w==b_h);
364             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
365             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
366         }
367     }
368 }
369
370 #define mca(dx,dy,b_w)\
371 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
372     assert(h==b_w);\
373     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
374 }
375
376 mca( 0, 0,16)
377 mca( 8, 0,16)
378 mca( 0, 8,16)
379 mca( 8, 8,16)
380 mca( 0, 0,8)
381 mca( 8, 0,8)
382 mca( 0, 8,8)
383 mca( 8, 8,8)
384
385 av_cold int ff_snow_common_init(AVCodecContext *avctx){
386     SnowContext *s = avctx->priv_data;
387     int width, height;
388     int i, j;
389
390     s->avctx= avctx;
391     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
392
393     ff_dsputil_init(&s->dsp, avctx);
394     ff_dwt_init(&s->dwt);
395
396 #define mcf(dx,dy)\
397     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
398     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
399         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
400     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
401     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
402         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
403
404     mcf( 0, 0)
405     mcf( 4, 0)
406     mcf( 8, 0)
407     mcf(12, 0)
408     mcf( 0, 4)
409     mcf( 4, 4)
410     mcf( 8, 4)
411     mcf(12, 4)
412     mcf( 0, 8)
413     mcf( 4, 8)
414     mcf( 8, 8)
415     mcf(12, 8)
416     mcf( 0,12)
417     mcf( 4,12)
418     mcf( 8,12)
419     mcf(12,12)
420
421 #define mcfh(dx,dy)\
422     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
423     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
424         mc_block_hpel ## dx ## dy ## 16;\
425     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
426     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
427         mc_block_hpel ## dx ## dy ## 8;
428
429     mcfh(0, 0)
430     mcfh(8, 0)
431     mcfh(0, 8)
432     mcfh(8, 8)
433
434     init_qexp();
435
436 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
437
438     width= s->avctx->width;
439     height= s->avctx->height;
440
441     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
442     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
443
444     for(i=0; i<MAX_REF_FRAMES; i++)
445         for(j=0; j<MAX_REF_FRAMES; j++)
446             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
447
448     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
449     s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
450
451     return 0;
452 }
453
454 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
455     SnowContext *s = avctx->priv_data;
456     int plane_index, level, orientation;
457
458     for(plane_index=0; plane_index<3; plane_index++){
459         int w= s->avctx->width;
460         int h= s->avctx->height;
461
462         if(plane_index){
463             w>>= s->chroma_h_shift;
464             h>>= s->chroma_v_shift;
465         }
466         s->plane[plane_index].width = w;
467         s->plane[plane_index].height= h;
468
469         for(level=s->spatial_decomposition_count-1; level>=0; level--){
470             for(orientation=level ? 1 : 0; orientation<4; orientation++){
471                 SubBand *b= &s->plane[plane_index].band[level][orientation];
472
473                 b->buf= s->spatial_dwt_buffer;
474                 b->level= level;
475                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
476                 b->width = (w + !(orientation&1))>>1;
477                 b->height= (h + !(orientation>1))>>1;
478
479                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
480                 b->buf_x_offset = 0;
481                 b->buf_y_offset = 0;
482
483                 if(orientation&1){
484                     b->buf += (w+1)>>1;
485                     b->buf_x_offset = (w+1)>>1;
486                 }
487                 if(orientation>1){
488                     b->buf += b->stride>>1;
489                     b->buf_y_offset = b->stride_line >> 1;
490                 }
491                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
492
493                 if(level)
494                     b->parent= &s->plane[plane_index].band[level-1][orientation];
495                 //FIXME avoid this realloc
496                 av_freep(&b->x_coeff);
497                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
498             }
499             w= (w+1)>>1;
500             h= (h+1)>>1;
501         }
502     }
503
504     return 0;
505 }
506
507 #define USE_HALFPEL_PLANE 0
508
509 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
510     int p,x,y;
511
512     for(p=0; p<3; p++){
513         int is_chroma= !!p;
514         int w= s->avctx->width  >>is_chroma;
515         int h= s->avctx->height >>is_chroma;
516         int ls= frame->linesize[p];
517         uint8_t *src= frame->data[p];
518
519         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
520         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
521         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
522
523         halfpel[0][p]= src;
524         for(y=0; y<h; y++){
525             for(x=0; x<w; x++){
526                 int i= y*ls + x;
527
528                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
529             }
530         }
531         for(y=0; y<h; y++){
532             for(x=0; x<w; x++){
533                 int i= y*ls + x;
534
535                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
536             }
537         }
538         src= halfpel[1][p];
539         for(y=0; y<h; y++){
540             for(x=0; x<w; x++){
541                 int i= y*ls + x;
542
543                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
544             }
545         }
546
547 //FIXME border!
548     }
549 }
550
551 void ff_snow_release_buffer(AVCodecContext *avctx)
552 {
553     SnowContext *s = avctx->priv_data;
554     int i;
555
556     if(s->last_picture[s->max_ref_frames-1].data[0]){
557         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
558         for(i=0; i<9; i++)
559             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
560                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
561     }
562 }
563
564 int ff_snow_frame_start(SnowContext *s){
565    AVFrame tmp;
566    int w= s->avctx->width; //FIXME round up to x16 ?
567    int h= s->avctx->height;
568
569     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
570         s->dsp.draw_edges(s->current_picture.data[0],
571                           s->current_picture.linesize[0], w   , h   ,
572                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
573         s->dsp.draw_edges(s->current_picture.data[1],
574                           s->current_picture.linesize[1], w>>1, h>>1,
575                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
576         s->dsp.draw_edges(s->current_picture.data[2],
577                           s->current_picture.linesize[2], w>>1, h>>1,
578                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
579     }
580
581     ff_snow_release_buffer(s->avctx);
582
583     tmp= s->last_picture[s->max_ref_frames-1];
584     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
585     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
586     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
587         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
588     s->last_picture[0]= s->current_picture;
589     s->current_picture= tmp;
590
591     if(s->keyframe){
592         s->ref_frames= 0;
593     }else{
594         int i;
595         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
596             if(i && s->last_picture[i-1].key_frame)
597                 break;
598         s->ref_frames= i;
599         if(s->ref_frames==0){
600             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
601             return -1;
602         }
603     }
604
605     s->current_picture.reference= 1;
606     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
607         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
608         return -1;
609     }
610
611     s->current_picture.key_frame= s->keyframe;
612
613     return 0;
614 }
615
616 av_cold void ff_snow_common_end(SnowContext *s)
617 {
618     int plane_index, level, orientation, i;
619
620     av_freep(&s->spatial_dwt_buffer);
621     av_freep(&s->spatial_idwt_buffer);
622
623     s->m.me.temp= NULL;
624     av_freep(&s->m.me.scratchpad);
625     av_freep(&s->m.me.map);
626     av_freep(&s->m.me.score_map);
627     av_freep(&s->m.obmc_scratchpad);
628
629     av_freep(&s->block);
630     av_freep(&s->scratchbuf);
631
632     for(i=0; i<MAX_REF_FRAMES; i++){
633         av_freep(&s->ref_mvs[i]);
634         av_freep(&s->ref_scores[i]);
635         if(s->last_picture[i].data[0])
636             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
637     }
638
639     for(plane_index=0; plane_index<3; plane_index++){
640         for(level=s->spatial_decomposition_count-1; level>=0; level--){
641             for(orientation=level ? 1 : 0; orientation<4; orientation++){
642                 SubBand *b= &s->plane[plane_index].band[level][orientation];
643
644                 av_freep(&b->x_coeff);
645             }
646         }
647     }
648     if (s->mconly_picture.data[0])
649         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
650     if (s->current_picture.data[0])
651         s->avctx->release_buffer(s->avctx, &s->current_picture);
652 }