]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
vc1: signal interlaced and tff flag to the consumer
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "snow.h"
28 #include "snowdata.h"
29
30 #include "rangecoder.h"
31 #include "mathops.h"
32 #include "h263.h"
33
34 #undef NDEBUG
35 #include <assert.h>
36
37
38 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
39                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
40     int y, x;
41     IDWTELEM * dst;
42     for(y=0; y<b_h; y++){
43         //FIXME ugly misuse of obmc_stride
44         const uint8_t *obmc1= obmc + y*obmc_stride;
45         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
46         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
47         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
48         dst = slice_buffer_get_line(sb, src_y + y);
49         for(x=0; x<b_w; x++){
50             int v=   obmc1[x] * block[3][x + y*src_stride]
51                     +obmc2[x] * block[2][x + y*src_stride]
52                     +obmc3[x] * block[1][x + y*src_stride]
53                     +obmc4[x] * block[0][x + y*src_stride];
54
55             v <<= 8 - LOG2_OBMC_MAX;
56             if(FRAC_BITS != 8){
57                 v >>= 8 - FRAC_BITS;
58             }
59             if(add){
60                 v += dst[x + src_x];
61                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
62                 if(v&(~255)) v= ~(v>>31);
63                 dst8[x + y*src_stride] = v;
64             }else{
65                 dst[x + src_x] -= v;
66             }
67         }
68     }
69 }
70
71 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
72     int plane_index, level, orientation;
73
74     for(plane_index=0; plane_index<3; plane_index++){
75         for(level=0; level<MAX_DECOMPOSITIONS; level++){
76             for(orientation=level ? 1:0; orientation<4; orientation++){
77                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
78             }
79         }
80     }
81     memset(s->header_state, MID_STATE, sizeof(s->header_state));
82     memset(s->block_state, MID_STATE, sizeof(s->block_state));
83 }
84
85 int ff_snow_alloc_blocks(SnowContext *s){
86     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
87     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
88
89     s->b_width = w;
90     s->b_height= h;
91
92     av_free(s->block);
93     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
94     return 0;
95 }
96
97 static void init_qexp(void){
98     int i;
99     double v=128;
100
101     for(i=0; i<QROOT; i++){
102         ff_qexp[i]= lrintf(v);
103         v *= pow(2, 1.0 / QROOT);
104     }
105 }
106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
107     static const uint8_t weight[64]={
108     8,7,6,5,4,3,2,1,
109     7,7,0,0,0,0,0,1,
110     6,0,6,0,0,0,2,0,
111     5,0,0,5,0,3,0,0,
112     4,0,0,0,4,0,0,0,
113     3,0,0,5,0,3,0,0,
114     2,0,6,0,0,0,2,0,
115     1,7,0,0,0,0,0,1,
116     };
117
118     static const uint8_t brane[256]={
119     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
120     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
121     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
122     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
123     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
124     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
125     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
126     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
127     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
128     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
129     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
130     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
131     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
132     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
133     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
134     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
135     };
136
137     static const uint8_t needs[16]={
138     0,1,0,0,
139     2,4,2,0,
140     0,1,0,0,
141     15
142     };
143
144     int x, y, b, r, l;
145     int16_t tmpIt   [64*(32+HTAPS_MAX)];
146     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
147     int16_t *tmpI= tmpIt;
148     uint8_t *tmp2= tmp2t[0];
149     const uint8_t *hpel[11];
150     assert(dx<16 && dy<16);
151     r= brane[dx + 16*dy]&15;
152     l= brane[dx + 16*dy]>>4;
153
154     b= needs[l] | needs[r];
155     if(p && !p->diag_mc)
156         b= 15;
157
158     if(b&5){
159         for(y=0; y < b_h+HTAPS_MAX-1; y++){
160             for(x=0; x < b_w; x++){
161                 int a_1=src[x + HTAPS_MAX/2-4];
162                 int a0= src[x + HTAPS_MAX/2-3];
163                 int a1= src[x + HTAPS_MAX/2-2];
164                 int a2= src[x + HTAPS_MAX/2-1];
165                 int a3= src[x + HTAPS_MAX/2+0];
166                 int a4= src[x + HTAPS_MAX/2+1];
167                 int a5= src[x + HTAPS_MAX/2+2];
168                 int a6= src[x + HTAPS_MAX/2+3];
169                 int am=0;
170                 if(!p || p->fast_mc){
171                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
172                     tmpI[x]= am;
173                     am= (am+16)>>5;
174                 }else{
175                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
176                     tmpI[x]= am;
177                     am= (am+32)>>6;
178                 }
179
180                 if(am&(~255)) am= ~(am>>31);
181                 tmp2[x]= am;
182             }
183             tmpI+= 64;
184             tmp2+= stride;
185             src += stride;
186         }
187         src -= stride*y;
188     }
189     src += HTAPS_MAX/2 - 1;
190     tmp2= tmp2t[1];
191
192     if(b&2){
193         for(y=0; y < b_h; y++){
194             for(x=0; x < b_w+1; x++){
195                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
196                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
197                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
198                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
199                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
200                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
201                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
202                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
203                 int am=0;
204                 if(!p || p->fast_mc)
205                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
206                 else
207                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
208
209                 if(am&(~255)) am= ~(am>>31);
210                 tmp2[x]= am;
211             }
212             src += stride;
213             tmp2+= stride;
214         }
215         src -= stride*y;
216     }
217     src += stride*(HTAPS_MAX/2 - 1);
218     tmp2= tmp2t[2];
219     tmpI= tmpIt;
220     if(b&4){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w; x++){
223                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
224                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
225                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
226                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
227                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
228                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
229                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
230                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
236                 if(am&(~255)) am= ~(am>>31);
237                 tmp2[x]= am;
238             }
239             tmpI+= 64;
240             tmp2+= stride;
241         }
242     }
243
244     hpel[ 0]= src;
245     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
246     hpel[ 2]= src + 1;
247
248     hpel[ 4]= tmp2t[1];
249     hpel[ 5]= tmp2t[2];
250     hpel[ 6]= tmp2t[1] + 1;
251
252     hpel[ 8]= src + stride;
253     hpel[ 9]= hpel[1] + stride;
254     hpel[10]= hpel[8] + 1;
255
256     if(b==15){
257         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
258         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
259         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
260         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
261         dx&=7;
262         dy&=7;
263         for(y=0; y < b_h; y++){
264             for(x=0; x < b_w; x++){
265                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
266                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
267             }
268             src1+=stride;
269             src2+=stride;
270             src3+=stride;
271             src4+=stride;
272             dst +=stride;
273         }
274     }else{
275         const uint8_t *src1= hpel[l];
276         const uint8_t *src2= hpel[r];
277         int a= weight[((dx&7) + (8*(dy&7)))];
278         int b= 8-a;
279         for(y=0; y < b_h; y++){
280             for(x=0; x < b_w; x++){
281                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
282             }
283             src1+=stride;
284             src2+=stride;
285             dst +=stride;
286         }
287     }
288 }
289
290 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
291     if(block->type & BLOCK_INTRA){
292         int x, y;
293         const unsigned color  = block->color[plane_index];
294         const unsigned color4 = color*0x01010101;
295         if(b_w==32){
296             for(y=0; y < b_h; y++){
297                 *(uint32_t*)&dst[0 + y*stride]= color4;
298                 *(uint32_t*)&dst[4 + y*stride]= color4;
299                 *(uint32_t*)&dst[8 + y*stride]= color4;
300                 *(uint32_t*)&dst[12+ y*stride]= color4;
301                 *(uint32_t*)&dst[16+ y*stride]= color4;
302                 *(uint32_t*)&dst[20+ y*stride]= color4;
303                 *(uint32_t*)&dst[24+ y*stride]= color4;
304                 *(uint32_t*)&dst[28+ y*stride]= color4;
305             }
306         }else if(b_w==16){
307             for(y=0; y < b_h; y++){
308                 *(uint32_t*)&dst[0 + y*stride]= color4;
309                 *(uint32_t*)&dst[4 + y*stride]= color4;
310                 *(uint32_t*)&dst[8 + y*stride]= color4;
311                 *(uint32_t*)&dst[12+ y*stride]= color4;
312             }
313         }else if(b_w==8){
314             for(y=0; y < b_h; y++){
315                 *(uint32_t*)&dst[0 + y*stride]= color4;
316                 *(uint32_t*)&dst[4 + y*stride]= color4;
317             }
318         }else if(b_w==4){
319             for(y=0; y < b_h; y++){
320                 *(uint32_t*)&dst[0 + y*stride]= color4;
321             }
322         }else{
323             for(y=0; y < b_h; y++){
324                 for(x=0; x < b_w; x++){
325                     dst[x + y*stride]= color;
326                 }
327             }
328         }
329     }else{
330         uint8_t *src= s->last_picture[block->ref].data[plane_index];
331         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
332         int mx= block->mx*scale;
333         int my= block->my*scale;
334         const int dx= mx&15;
335         const int dy= my&15;
336         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
337         sx += (mx>>4) - (HTAPS_MAX/2-1);
338         sy += (my>>4) - (HTAPS_MAX/2-1);
339         src += sx + sy*stride;
340         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
341            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
342             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
343             src= tmp + MB_SIZE;
344         }
345 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
346 //        assert(!(b_w&(b_w-1)));
347         assert(b_w>1 && b_h>1);
348         assert((tab_index>=0 && tab_index<4) || b_w==32);
349         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
350             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
351         else if(b_w==32){
352             int y;
353             for(y=0; y<b_h; y+=16){
354                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
355                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
356             }
357         }else if(b_w==b_h)
358             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
359         else if(b_w==2*b_h){
360             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
361             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
362         }else{
363             assert(2*b_w==b_h);
364             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
365             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
366         }
367     }
368 }
369
370 #define mca(dx,dy,b_w)\
371 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
372     assert(h==b_w);\
373     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
374 }
375
376 mca( 0, 0,16)
377 mca( 8, 0,16)
378 mca( 0, 8,16)
379 mca( 8, 8,16)
380 mca( 0, 0,8)
381 mca( 8, 0,8)
382 mca( 0, 8,8)
383 mca( 8, 8,8)
384
385 av_cold int ff_snow_common_init(AVCodecContext *avctx){
386     SnowContext *s = avctx->priv_data;
387     int width, height;
388     int i, j;
389
390     s->avctx= avctx;
391     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
392
393     ff_dsputil_init(&s->dsp, avctx);
394     ff_dwt_init(&s->dwt);
395
396 #define mcf(dx,dy)\
397     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
398     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
399         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
400     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
401     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
402         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
403
404     mcf( 0, 0)
405     mcf( 4, 0)
406     mcf( 8, 0)
407     mcf(12, 0)
408     mcf( 0, 4)
409     mcf( 4, 4)
410     mcf( 8, 4)
411     mcf(12, 4)
412     mcf( 0, 8)
413     mcf( 4, 8)
414     mcf( 8, 8)
415     mcf(12, 8)
416     mcf( 0,12)
417     mcf( 4,12)
418     mcf( 8,12)
419     mcf(12,12)
420
421 #define mcfh(dx,dy)\
422     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
423     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
424         mc_block_hpel ## dx ## dy ## 16;\
425     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
426     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
427         mc_block_hpel ## dx ## dy ## 8;
428
429     mcfh(0, 0)
430     mcfh(8, 0)
431     mcfh(0, 8)
432     mcfh(8, 8)
433
434     init_qexp();
435
436 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
437
438     width= s->avctx->width;
439     height= s->avctx->height;
440
441     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
442     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
443     s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
444     s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
445
446     for(i=0; i<MAX_REF_FRAMES; i++)
447         for(j=0; j<MAX_REF_FRAMES; j++)
448             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
449
450     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
451     s->scratchbuf = av_mallocz(s->mconly_picture.linesize[0]*7*MB_SIZE);
452
453     return 0;
454 }
455
456 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
457     SnowContext *s = avctx->priv_data;
458     int plane_index, level, orientation;
459
460     for(plane_index=0; plane_index<3; plane_index++){
461         int w= s->avctx->width;
462         int h= s->avctx->height;
463
464         if(plane_index){
465             w>>= s->chroma_h_shift;
466             h>>= s->chroma_v_shift;
467         }
468         s->plane[plane_index].width = w;
469         s->plane[plane_index].height= h;
470
471         for(level=s->spatial_decomposition_count-1; level>=0; level--){
472             for(orientation=level ? 1 : 0; orientation<4; orientation++){
473                 SubBand *b= &s->plane[plane_index].band[level][orientation];
474
475                 b->buf= s->spatial_dwt_buffer;
476                 b->level= level;
477                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
478                 b->width = (w + !(orientation&1))>>1;
479                 b->height= (h + !(orientation>1))>>1;
480
481                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
482                 b->buf_x_offset = 0;
483                 b->buf_y_offset = 0;
484
485                 if(orientation&1){
486                     b->buf += (w+1)>>1;
487                     b->buf_x_offset = (w+1)>>1;
488                 }
489                 if(orientation>1){
490                     b->buf += b->stride>>1;
491                     b->buf_y_offset = b->stride_line >> 1;
492                 }
493                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
494
495                 if(level)
496                     b->parent= &s->plane[plane_index].band[level-1][orientation];
497                 //FIXME avoid this realloc
498                 av_freep(&b->x_coeff);
499                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
500             }
501             w= (w+1)>>1;
502             h= (h+1)>>1;
503         }
504     }
505
506     return 0;
507 }
508
509 #define USE_HALFPEL_PLANE 0
510
511 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
512     int p,x,y;
513
514     for(p=0; p<3; p++){
515         int is_chroma= !!p;
516         int w= s->avctx->width  >>is_chroma;
517         int h= s->avctx->height >>is_chroma;
518         int ls= frame->linesize[p];
519         uint8_t *src= frame->data[p];
520
521         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
522         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
523         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
524
525         halfpel[0][p]= src;
526         for(y=0; y<h; y++){
527             for(x=0; x<w; x++){
528                 int i= y*ls + x;
529
530                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
531             }
532         }
533         for(y=0; y<h; y++){
534             for(x=0; x<w; x++){
535                 int i= y*ls + x;
536
537                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
538             }
539         }
540         src= halfpel[1][p];
541         for(y=0; y<h; y++){
542             for(x=0; x<w; x++){
543                 int i= y*ls + x;
544
545                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
546             }
547         }
548
549 //FIXME border!
550     }
551 }
552
553 void ff_snow_release_buffer(AVCodecContext *avctx)
554 {
555     SnowContext *s = avctx->priv_data;
556     int i;
557
558     if(s->last_picture[s->max_ref_frames-1].data[0]){
559         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
560         for(i=0; i<9; i++)
561             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
562                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
563     }
564 }
565
566 int ff_snow_frame_start(SnowContext *s){
567    AVFrame tmp;
568    int w= s->avctx->width; //FIXME round up to x16 ?
569    int h= s->avctx->height;
570
571     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
572         s->dsp.draw_edges(s->current_picture.data[0],
573                           s->current_picture.linesize[0], w   , h   ,
574                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
575         s->dsp.draw_edges(s->current_picture.data[1],
576                           s->current_picture.linesize[1], w>>1, h>>1,
577                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
578         s->dsp.draw_edges(s->current_picture.data[2],
579                           s->current_picture.linesize[2], w>>1, h>>1,
580                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
581     }
582
583     ff_snow_release_buffer(s->avctx);
584
585     tmp= s->last_picture[s->max_ref_frames-1];
586     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
587     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
588     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
589         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
590     s->last_picture[0]= s->current_picture;
591     s->current_picture= tmp;
592
593     if(s->keyframe){
594         s->ref_frames= 0;
595     }else{
596         int i;
597         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
598             if(i && s->last_picture[i-1].key_frame)
599                 break;
600         s->ref_frames= i;
601         if(s->ref_frames==0){
602             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
603             return -1;
604         }
605     }
606
607     s->current_picture.reference= 3;
608     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
609         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
610         return -1;
611     }
612
613     s->current_picture.key_frame= s->keyframe;
614
615     return 0;
616 }
617
618 av_cold void ff_snow_common_end(SnowContext *s)
619 {
620     int plane_index, level, orientation, i;
621
622     av_freep(&s->spatial_dwt_buffer);
623     av_freep(&s->temp_dwt_buffer);
624     av_freep(&s->spatial_idwt_buffer);
625     av_freep(&s->temp_idwt_buffer);
626
627     s->m.me.temp= NULL;
628     av_freep(&s->m.me.scratchpad);
629     av_freep(&s->m.me.map);
630     av_freep(&s->m.me.score_map);
631     av_freep(&s->m.obmc_scratchpad);
632
633     av_freep(&s->block);
634     av_freep(&s->scratchbuf);
635
636     for(i=0; i<MAX_REF_FRAMES; i++){
637         av_freep(&s->ref_mvs[i]);
638         av_freep(&s->ref_scores[i]);
639         if(s->last_picture[i].data[0])
640             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
641     }
642
643     for(plane_index=0; plane_index<3; plane_index++){
644         for(level=s->spatial_decomposition_count-1; level>=0; level--){
645             for(orientation=level ? 1 : 0; orientation<4; orientation++){
646                 SubBand *b= &s->plane[plane_index].band[level][orientation];
647
648                 av_freep(&b->x_coeff);
649             }
650         }
651     }
652     if (s->mconly_picture.data[0])
653         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
654     if (s->current_picture.data[0])
655         s->avctx->release_buffer(s->avctx, &s->current_picture);
656 }