]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
libilbc: set channel layout
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "snow.h"
28 #include "snowdata.h"
29
30 #include "rangecoder.h"
31 #include "mathops.h"
32 #include "h263.h"
33
34 #undef NDEBUG
35 #include <assert.h>
36
37
38 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
39                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
40     int y, x;
41     IDWTELEM * dst;
42     for(y=0; y<b_h; y++){
43         //FIXME ugly misuse of obmc_stride
44         const uint8_t *obmc1= obmc + y*obmc_stride;
45         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
46         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
47         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
48         dst = slice_buffer_get_line(sb, src_y + y);
49         for(x=0; x<b_w; x++){
50             int v=   obmc1[x] * block[3][x + y*src_stride]
51                     +obmc2[x] * block[2][x + y*src_stride]
52                     +obmc3[x] * block[1][x + y*src_stride]
53                     +obmc4[x] * block[0][x + y*src_stride];
54
55             v <<= 8 - LOG2_OBMC_MAX;
56             if(FRAC_BITS != 8){
57                 v >>= 8 - FRAC_BITS;
58             }
59             if(add){
60                 v += dst[x + src_x];
61                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
62                 if(v&(~255)) v= ~(v>>31);
63                 dst8[x + y*src_stride] = v;
64             }else{
65                 dst[x + src_x] -= v;
66             }
67         }
68     }
69 }
70
71 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
72     int plane_index, level, orientation;
73
74     for(plane_index=0; plane_index<3; plane_index++){
75         for(level=0; level<MAX_DECOMPOSITIONS; level++){
76             for(orientation=level ? 1:0; orientation<4; orientation++){
77                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
78             }
79         }
80     }
81     memset(s->header_state, MID_STATE, sizeof(s->header_state));
82     memset(s->block_state, MID_STATE, sizeof(s->block_state));
83 }
84
85 int ff_snow_alloc_blocks(SnowContext *s){
86     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
87     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
88
89     s->b_width = w;
90     s->b_height= h;
91
92     av_free(s->block);
93     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
94     return 0;
95 }
96
97 static void init_qexp(void){
98     int i;
99     double v=128;
100
101     for(i=0; i<QROOT; i++){
102         ff_qexp[i]= lrintf(v);
103         v *= pow(2, 1.0 / QROOT);
104     }
105 }
106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
107     static const uint8_t weight[64]={
108     8,7,6,5,4,3,2,1,
109     7,7,0,0,0,0,0,1,
110     6,0,6,0,0,0,2,0,
111     5,0,0,5,0,3,0,0,
112     4,0,0,0,4,0,0,0,
113     3,0,0,5,0,3,0,0,
114     2,0,6,0,0,0,2,0,
115     1,7,0,0,0,0,0,1,
116     };
117
118     static const uint8_t brane[256]={
119     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
120     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
121     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
122     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
123     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
124     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
125     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
126     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
127     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
128     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
129     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
130     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
131     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
132     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
133     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
134     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
135     };
136
137     static const uint8_t needs[16]={
138     0,1,0,0,
139     2,4,2,0,
140     0,1,0,0,
141     15
142     };
143
144     int x, y, b, r, l;
145     int16_t tmpIt   [64*(32+HTAPS_MAX)];
146     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
147     int16_t *tmpI= tmpIt;
148     uint8_t *tmp2= tmp2t[0];
149     const uint8_t *hpel[11];
150     assert(dx<16 && dy<16);
151     r= brane[dx + 16*dy]&15;
152     l= brane[dx + 16*dy]>>4;
153
154     b= needs[l] | needs[r];
155     if(p && !p->diag_mc)
156         b= 15;
157
158     if(b&5){
159         for(y=0; y < b_h+HTAPS_MAX-1; y++){
160             for(x=0; x < b_w; x++){
161                 int a_1=src[x + HTAPS_MAX/2-4];
162                 int a0= src[x + HTAPS_MAX/2-3];
163                 int a1= src[x + HTAPS_MAX/2-2];
164                 int a2= src[x + HTAPS_MAX/2-1];
165                 int a3= src[x + HTAPS_MAX/2+0];
166                 int a4= src[x + HTAPS_MAX/2+1];
167                 int a5= src[x + HTAPS_MAX/2+2];
168                 int a6= src[x + HTAPS_MAX/2+3];
169                 int am=0;
170                 if(!p || p->fast_mc){
171                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
172                     tmpI[x]= am;
173                     am= (am+16)>>5;
174                 }else{
175                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
176                     tmpI[x]= am;
177                     am= (am+32)>>6;
178                 }
179
180                 if(am&(~255)) am= ~(am>>31);
181                 tmp2[x]= am;
182             }
183             tmpI+= 64;
184             tmp2+= 64;
185             src += stride;
186         }
187         src -= stride*y;
188     }
189     src += HTAPS_MAX/2 - 1;
190     tmp2= tmp2t[1];
191
192     if(b&2){
193         for(y=0; y < b_h; y++){
194             for(x=0; x < b_w+1; x++){
195                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
196                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
197                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
198                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
199                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
200                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
201                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
202                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
203                 int am=0;
204                 if(!p || p->fast_mc)
205                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
206                 else
207                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
208
209                 if(am&(~255)) am= ~(am>>31);
210                 tmp2[x]= am;
211             }
212             src += stride;
213             tmp2+= 64;
214         }
215         src -= stride*y;
216     }
217     src += stride*(HTAPS_MAX/2 - 1);
218     tmp2= tmp2t[2];
219     tmpI= tmpIt;
220     if(b&4){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w; x++){
223                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
224                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
225                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
226                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
227                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
228                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
229                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
230                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
236                 if(am&(~255)) am= ~(am>>31);
237                 tmp2[x]= am;
238             }
239             tmpI+= 64;
240             tmp2+= 64;
241         }
242     }
243
244     hpel[ 0]= src;
245     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
246     hpel[ 2]= src + 1;
247
248     hpel[ 4]= tmp2t[1];
249     hpel[ 5]= tmp2t[2];
250     hpel[ 6]= tmp2t[1] + 1;
251
252     hpel[ 8]= src + stride;
253     hpel[ 9]= hpel[1] + 64;
254     hpel[10]= hpel[8] + 1;
255
256 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
257
258     if(b==15){
259         int dxy = dx / 8 + dy / 8 * 4;
260         const uint8_t *src1 = hpel[dxy    ];
261         const uint8_t *src2 = hpel[dxy + 1];
262         const uint8_t *src3 = hpel[dxy + 4];
263         const uint8_t *src4 = hpel[dxy + 5];
264         int stride1 = MC_STRIDE(dxy);
265         int stride2 = MC_STRIDE(dxy + 1);
266         int stride3 = MC_STRIDE(dxy + 4);
267         int stride4 = MC_STRIDE(dxy + 5);
268         dx&=7;
269         dy&=7;
270         for(y=0; y < b_h; y++){
271             for(x=0; x < b_w; x++){
272                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
273                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
274             }
275             src1+=stride1;
276             src2+=stride2;
277             src3+=stride3;
278             src4+=stride4;
279             dst +=stride;
280         }
281     }else{
282         const uint8_t *src1= hpel[l];
283         const uint8_t *src2= hpel[r];
284         int stride1 = MC_STRIDE(l);
285         int stride2 = MC_STRIDE(r);
286         int a= weight[((dx&7) + (8*(dy&7)))];
287         int b= 8-a;
288         for(y=0; y < b_h; y++){
289             for(x=0; x < b_w; x++){
290                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
291             }
292             src1+=stride1;
293             src2+=stride2;
294             dst +=stride;
295         }
296     }
297 }
298
299 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
300     if(block->type & BLOCK_INTRA){
301         int x, y;
302         const unsigned color  = block->color[plane_index];
303         const unsigned color4 = color*0x01010101;
304         if(b_w==32){
305             for(y=0; y < b_h; y++){
306                 *(uint32_t*)&dst[0 + y*stride]= color4;
307                 *(uint32_t*)&dst[4 + y*stride]= color4;
308                 *(uint32_t*)&dst[8 + y*stride]= color4;
309                 *(uint32_t*)&dst[12+ y*stride]= color4;
310                 *(uint32_t*)&dst[16+ y*stride]= color4;
311                 *(uint32_t*)&dst[20+ y*stride]= color4;
312                 *(uint32_t*)&dst[24+ y*stride]= color4;
313                 *(uint32_t*)&dst[28+ y*stride]= color4;
314             }
315         }else if(b_w==16){
316             for(y=0; y < b_h; y++){
317                 *(uint32_t*)&dst[0 + y*stride]= color4;
318                 *(uint32_t*)&dst[4 + y*stride]= color4;
319                 *(uint32_t*)&dst[8 + y*stride]= color4;
320                 *(uint32_t*)&dst[12+ y*stride]= color4;
321             }
322         }else if(b_w==8){
323             for(y=0; y < b_h; y++){
324                 *(uint32_t*)&dst[0 + y*stride]= color4;
325                 *(uint32_t*)&dst[4 + y*stride]= color4;
326             }
327         }else if(b_w==4){
328             for(y=0; y < b_h; y++){
329                 *(uint32_t*)&dst[0 + y*stride]= color4;
330             }
331         }else{
332             for(y=0; y < b_h; y++){
333                 for(x=0; x < b_w; x++){
334                     dst[x + y*stride]= color;
335                 }
336             }
337         }
338     }else{
339         uint8_t *src= s->last_picture[block->ref].data[plane_index];
340         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
341         int mx= block->mx*scale;
342         int my= block->my*scale;
343         const int dx= mx&15;
344         const int dy= my&15;
345         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
346         sx += (mx>>4) - (HTAPS_MAX/2-1);
347         sy += (my>>4) - (HTAPS_MAX/2-1);
348         src += sx + sy*stride;
349         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
350            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
351             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
352             src= tmp + MB_SIZE;
353         }
354 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
355 //        assert(!(b_w&(b_w-1)));
356         assert(b_w>1 && b_h>1);
357         assert((tab_index>=0 && tab_index<4) || b_w==32);
358         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
359             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
360         else if(b_w==32){
361             int y;
362             for(y=0; y<b_h; y+=16){
363                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
364                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
365             }
366         }else if(b_w==b_h)
367             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
368         else if(b_w==2*b_h){
369             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
370             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
371         }else{
372             assert(2*b_w==b_h);
373             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
375         }
376     }
377 }
378
379 #define mca(dx,dy,b_w)\
380 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
381     assert(h==b_w);\
382     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
383 }
384
385 mca( 0, 0,16)
386 mca( 8, 0,16)
387 mca( 0, 8,16)
388 mca( 8, 8,16)
389 mca( 0, 0,8)
390 mca( 8, 0,8)
391 mca( 0, 8,8)
392 mca( 8, 8,8)
393
394 av_cold int ff_snow_common_init(AVCodecContext *avctx){
395     SnowContext *s = avctx->priv_data;
396     int width, height;
397     int i, j, ret;
398     int emu_buf_size;
399
400     s->avctx= avctx;
401     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
402
403     ff_dsputil_init(&s->dsp, avctx);
404     ff_dwt_init(&s->dwt);
405
406 #define mcf(dx,dy)\
407     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
408     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
409         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
410     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
411     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
412         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
413
414     mcf( 0, 0)
415     mcf( 4, 0)
416     mcf( 8, 0)
417     mcf(12, 0)
418     mcf( 0, 4)
419     mcf( 4, 4)
420     mcf( 8, 4)
421     mcf(12, 4)
422     mcf( 0, 8)
423     mcf( 4, 8)
424     mcf( 8, 8)
425     mcf(12, 8)
426     mcf( 0,12)
427     mcf( 4,12)
428     mcf( 8,12)
429     mcf(12,12)
430
431 #define mcfh(dx,dy)\
432     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
433     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
434         mc_block_hpel ## dx ## dy ## 16;\
435     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
436     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
437         mc_block_hpel ## dx ## dy ## 8;
438
439     mcfh(0, 0)
440     mcfh(8, 0)
441     mcfh(0, 8)
442     mcfh(8, 8)
443
444     init_qexp();
445
446 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
447
448     width= s->avctx->width;
449     height= s->avctx->height;
450
451     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
452     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
453     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
454     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
455     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
456
457     for(i=0; i<MAX_REF_FRAMES; i++)
458         for(j=0; j<MAX_REF_FRAMES; j++)
459             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
460
461     if ((ret = s->avctx->get_buffer(s->avctx, &s->mconly_picture)) < 0) {
462         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
463         return ret;
464     }
465     FF_ALLOC_OR_GOTO(avctx, s->scratchbuf, s->mconly_picture.linesize[0]*7*MB_SIZE, fail);
466     emu_buf_size = s->mconly_picture.linesize[0] * (2 * MB_SIZE + HTAPS_MAX - 1);
467     FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
468
469     return 0;
470 fail:
471     return AVERROR(ENOMEM);
472 }
473
474 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
475     SnowContext *s = avctx->priv_data;
476     int plane_index, level, orientation;
477
478     for(plane_index=0; plane_index<3; plane_index++){
479         int w= s->avctx->width;
480         int h= s->avctx->height;
481
482         if(plane_index){
483             w>>= s->chroma_h_shift;
484             h>>= s->chroma_v_shift;
485         }
486         s->plane[plane_index].width = w;
487         s->plane[plane_index].height= h;
488
489         for(level=s->spatial_decomposition_count-1; level>=0; level--){
490             for(orientation=level ? 1 : 0; orientation<4; orientation++){
491                 SubBand *b= &s->plane[plane_index].band[level][orientation];
492
493                 b->buf= s->spatial_dwt_buffer;
494                 b->level= level;
495                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
496                 b->width = (w + !(orientation&1))>>1;
497                 b->height= (h + !(orientation>1))>>1;
498
499                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
500                 b->buf_x_offset = 0;
501                 b->buf_y_offset = 0;
502
503                 if(orientation&1){
504                     b->buf += (w+1)>>1;
505                     b->buf_x_offset = (w+1)>>1;
506                 }
507                 if(orientation>1){
508                     b->buf += b->stride>>1;
509                     b->buf_y_offset = b->stride_line >> 1;
510                 }
511                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
512
513                 if(level)
514                     b->parent= &s->plane[plane_index].band[level-1][orientation];
515                 //FIXME avoid this realloc
516                 av_freep(&b->x_coeff);
517                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
518             }
519             w= (w+1)>>1;
520             h= (h+1)>>1;
521         }
522     }
523
524     return 0;
525 }
526
527 #define USE_HALFPEL_PLANE 0
528
529 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
530     int p,x,y;
531
532     for(p=0; p<3; p++){
533         int is_chroma= !!p;
534         int w= s->avctx->width  >>is_chroma;
535         int h= s->avctx->height >>is_chroma;
536         int ls= frame->linesize[p];
537         uint8_t *src= frame->data[p];
538
539         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
540         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
541         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
542
543         halfpel[0][p]= src;
544         for(y=0; y<h; y++){
545             for(x=0; x<w; x++){
546                 int i= y*ls + x;
547
548                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
549             }
550         }
551         for(y=0; y<h; y++){
552             for(x=0; x<w; x++){
553                 int i= y*ls + x;
554
555                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
556             }
557         }
558         src= halfpel[1][p];
559         for(y=0; y<h; y++){
560             for(x=0; x<w; x++){
561                 int i= y*ls + x;
562
563                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
564             }
565         }
566
567 //FIXME border!
568     }
569 }
570
571 void ff_snow_release_buffer(AVCodecContext *avctx)
572 {
573     SnowContext *s = avctx->priv_data;
574     int i;
575
576     if(s->last_picture[s->max_ref_frames-1].data[0]){
577         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
578         for(i=0; i<9; i++)
579             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
580                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
581     }
582 }
583
584 int ff_snow_frame_start(SnowContext *s){
585    AVFrame tmp;
586    int w= s->avctx->width; //FIXME round up to x16 ?
587    int h= s->avctx->height;
588
589     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
590         s->dsp.draw_edges(s->current_picture.data[0],
591                           s->current_picture.linesize[0], w   , h   ,
592                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
593         s->dsp.draw_edges(s->current_picture.data[1],
594                           s->current_picture.linesize[1], w>>1, h>>1,
595                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
596         s->dsp.draw_edges(s->current_picture.data[2],
597                           s->current_picture.linesize[2], w>>1, h>>1,
598                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
599     }
600
601     ff_snow_release_buffer(s->avctx);
602
603     tmp= s->last_picture[s->max_ref_frames-1];
604     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
605     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
606     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
607         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
608     s->last_picture[0]= s->current_picture;
609     s->current_picture= tmp;
610
611     if(s->keyframe){
612         s->ref_frames= 0;
613     }else{
614         int i;
615         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
616             if(i && s->last_picture[i-1].key_frame)
617                 break;
618         s->ref_frames= i;
619         if(s->ref_frames==0){
620             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
621             return -1;
622         }
623     }
624
625     s->current_picture.reference= 1;
626     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
627         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
628         return -1;
629     }
630
631     s->current_picture.key_frame= s->keyframe;
632
633     return 0;
634 }
635
636 av_cold void ff_snow_common_end(SnowContext *s)
637 {
638     int plane_index, level, orientation, i;
639
640     av_freep(&s->spatial_dwt_buffer);
641     av_freep(&s->temp_dwt_buffer);
642     av_freep(&s->spatial_idwt_buffer);
643     av_freep(&s->temp_idwt_buffer);
644     av_freep(&s->run_buffer);
645
646     s->m.me.temp= NULL;
647     av_freep(&s->m.me.scratchpad);
648     av_freep(&s->m.me.map);
649     av_freep(&s->m.me.score_map);
650     av_freep(&s->m.obmc_scratchpad);
651
652     av_freep(&s->block);
653     av_freep(&s->scratchbuf);
654     av_freep(&s->emu_edge_buffer);
655
656     for(i=0; i<MAX_REF_FRAMES; i++){
657         av_freep(&s->ref_mvs[i]);
658         av_freep(&s->ref_scores[i]);
659         if(s->last_picture[i].data[0])
660             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
661     }
662
663     for(plane_index=0; plane_index<3; plane_index++){
664         for(level=s->spatial_decomposition_count-1; level>=0; level--){
665             for(orientation=level ? 1 : 0; orientation<4; orientation++){
666                 SubBand *b= &s->plane[plane_index].band[level][orientation];
667
668                 av_freep(&b->x_coeff);
669             }
670         }
671     }
672     if (s->mconly_picture.data[0])
673         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
674     if (s->current_picture.data[0])
675         s->avctx->release_buffer(s->avctx, &s->current_picture);
676 }