]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
svq1: unmacroify macros used only once.
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35 #undef NDEBUG
36 #include <assert.h>
37
38
39 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
40                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
41     int y, x;
42     IDWTELEM * dst;
43     for(y=0; y<b_h; y++){
44         //FIXME ugly misuse of obmc_stride
45         const uint8_t *obmc1= obmc + y*obmc_stride;
46         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
47         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
48         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
49         dst = slice_buffer_get_line(sb, src_y + y);
50         for(x=0; x<b_w; x++){
51             int v=   obmc1[x] * block[3][x + y*src_stride]
52                     +obmc2[x] * block[2][x + y*src_stride]
53                     +obmc3[x] * block[1][x + y*src_stride]
54                     +obmc4[x] * block[0][x + y*src_stride];
55
56             v <<= 8 - LOG2_OBMC_MAX;
57             if(FRAC_BITS != 8){
58                 v >>= 8 - FRAC_BITS;
59             }
60             if(add){
61                 v += dst[x + src_x];
62                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
63                 if(v&(~255)) v= ~(v>>31);
64                 dst8[x + y*src_stride] = v;
65             }else{
66                 dst[x + src_x] -= v;
67             }
68         }
69     }
70 }
71
72 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
73     int plane_index, level, orientation;
74
75     for(plane_index=0; plane_index<3; plane_index++){
76         for(level=0; level<MAX_DECOMPOSITIONS; level++){
77             for(orientation=level ? 1:0; orientation<4; orientation++){
78                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
79             }
80         }
81     }
82     memset(s->header_state, MID_STATE, sizeof(s->header_state));
83     memset(s->block_state, MID_STATE, sizeof(s->block_state));
84 }
85
86 int ff_snow_alloc_blocks(SnowContext *s){
87     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
88     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
89
90     s->b_width = w;
91     s->b_height= h;
92
93     av_free(s->block);
94     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
95     return 0;
96 }
97
98 static void init_qexp(void){
99     int i;
100     double v=128;
101
102     for(i=0; i<QROOT; i++){
103         ff_qexp[i]= lrintf(v);
104         v *= pow(2, 1.0 / QROOT);
105     }
106 }
107 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
108     static const uint8_t weight[64]={
109     8,7,6,5,4,3,2,1,
110     7,7,0,0,0,0,0,1,
111     6,0,6,0,0,0,2,0,
112     5,0,0,5,0,3,0,0,
113     4,0,0,0,4,0,0,0,
114     3,0,0,5,0,3,0,0,
115     2,0,6,0,0,0,2,0,
116     1,7,0,0,0,0,0,1,
117     };
118
119     static const uint8_t brane[256]={
120     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
121     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
122     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
123     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
124     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
125     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
126     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
127     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
128     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
129     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
130     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
131     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
132     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
133     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
134     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
135     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
136     };
137
138     static const uint8_t needs[16]={
139     0,1,0,0,
140     2,4,2,0,
141     0,1,0,0,
142     15
143     };
144
145     int x, y, b, r, l;
146     int16_t tmpIt   [64*(32+HTAPS_MAX)];
147     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
148     int16_t *tmpI= tmpIt;
149     uint8_t *tmp2= tmp2t[0];
150     const uint8_t *hpel[11];
151     assert(dx<16 && dy<16);
152     r= brane[dx + 16*dy]&15;
153     l= brane[dx + 16*dy]>>4;
154
155     b= needs[l] | needs[r];
156     if(p && !p->diag_mc)
157         b= 15;
158
159     if(b&5){
160         for(y=0; y < b_h+HTAPS_MAX-1; y++){
161             for(x=0; x < b_w; x++){
162                 int a_1=src[x + HTAPS_MAX/2-4];
163                 int a0= src[x + HTAPS_MAX/2-3];
164                 int a1= src[x + HTAPS_MAX/2-2];
165                 int a2= src[x + HTAPS_MAX/2-1];
166                 int a3= src[x + HTAPS_MAX/2+0];
167                 int a4= src[x + HTAPS_MAX/2+1];
168                 int a5= src[x + HTAPS_MAX/2+2];
169                 int a6= src[x + HTAPS_MAX/2+3];
170                 int am=0;
171                 if(!p || p->fast_mc){
172                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
173                     tmpI[x]= am;
174                     am= (am+16)>>5;
175                 }else{
176                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
177                     tmpI[x]= am;
178                     am= (am+32)>>6;
179                 }
180
181                 if(am&(~255)) am= ~(am>>31);
182                 tmp2[x]= am;
183             }
184             tmpI+= 64;
185             tmp2+= 64;
186             src += stride;
187         }
188         src -= stride*y;
189     }
190     src += HTAPS_MAX/2 - 1;
191     tmp2= tmp2t[1];
192
193     if(b&2){
194         for(y=0; y < b_h; y++){
195             for(x=0; x < b_w+1; x++){
196                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
197                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
198                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
199                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
200                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
201                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
202                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
203                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
204                 int am=0;
205                 if(!p || p->fast_mc)
206                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
207                 else
208                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
209
210                 if(am&(~255)) am= ~(am>>31);
211                 tmp2[x]= am;
212             }
213             src += stride;
214             tmp2+= 64;
215         }
216         src -= stride*y;
217     }
218     src += stride*(HTAPS_MAX/2 - 1);
219     tmp2= tmp2t[2];
220     tmpI= tmpIt;
221     if(b&4){
222         for(y=0; y < b_h; y++){
223             for(x=0; x < b_w; x++){
224                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
225                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
226                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
227                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
228                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
229                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
230                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
231                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
232                 int am=0;
233                 if(!p || p->fast_mc)
234                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
235                 else
236                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
237                 if(am&(~255)) am= ~(am>>31);
238                 tmp2[x]= am;
239             }
240             tmpI+= 64;
241             tmp2+= 64;
242         }
243     }
244
245     hpel[ 0]= src;
246     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
247     hpel[ 2]= src + 1;
248
249     hpel[ 4]= tmp2t[1];
250     hpel[ 5]= tmp2t[2];
251     hpel[ 6]= tmp2t[1] + 1;
252
253     hpel[ 8]= src + stride;
254     hpel[ 9]= hpel[1] + 64;
255     hpel[10]= hpel[8] + 1;
256
257 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
258
259     if(b==15){
260         int dxy = dx / 8 + dy / 8 * 4;
261         const uint8_t *src1 = hpel[dxy    ];
262         const uint8_t *src2 = hpel[dxy + 1];
263         const uint8_t *src3 = hpel[dxy + 4];
264         const uint8_t *src4 = hpel[dxy + 5];
265         int stride1 = MC_STRIDE(dxy);
266         int stride2 = MC_STRIDE(dxy + 1);
267         int stride3 = MC_STRIDE(dxy + 4);
268         int stride4 = MC_STRIDE(dxy + 5);
269         dx&=7;
270         dy&=7;
271         for(y=0; y < b_h; y++){
272             for(x=0; x < b_w; x++){
273                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
274                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
275             }
276             src1+=stride1;
277             src2+=stride2;
278             src3+=stride3;
279             src4+=stride4;
280             dst +=stride;
281         }
282     }else{
283         const uint8_t *src1= hpel[l];
284         const uint8_t *src2= hpel[r];
285         int stride1 = MC_STRIDE(l);
286         int stride2 = MC_STRIDE(r);
287         int a= weight[((dx&7) + (8*(dy&7)))];
288         int b= 8-a;
289         for(y=0; y < b_h; y++){
290             for(x=0; x < b_w; x++){
291                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
292             }
293             src1+=stride1;
294             src2+=stride2;
295             dst +=stride;
296         }
297     }
298 }
299
300 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
301     if(block->type & BLOCK_INTRA){
302         int x, y;
303         const unsigned color  = block->color[plane_index];
304         const unsigned color4 = color*0x01010101;
305         if(b_w==32){
306             for(y=0; y < b_h; y++){
307                 *(uint32_t*)&dst[0 + y*stride]= color4;
308                 *(uint32_t*)&dst[4 + y*stride]= color4;
309                 *(uint32_t*)&dst[8 + y*stride]= color4;
310                 *(uint32_t*)&dst[12+ y*stride]= color4;
311                 *(uint32_t*)&dst[16+ y*stride]= color4;
312                 *(uint32_t*)&dst[20+ y*stride]= color4;
313                 *(uint32_t*)&dst[24+ y*stride]= color4;
314                 *(uint32_t*)&dst[28+ y*stride]= color4;
315             }
316         }else if(b_w==16){
317             for(y=0; y < b_h; y++){
318                 *(uint32_t*)&dst[0 + y*stride]= color4;
319                 *(uint32_t*)&dst[4 + y*stride]= color4;
320                 *(uint32_t*)&dst[8 + y*stride]= color4;
321                 *(uint32_t*)&dst[12+ y*stride]= color4;
322             }
323         }else if(b_w==8){
324             for(y=0; y < b_h; y++){
325                 *(uint32_t*)&dst[0 + y*stride]= color4;
326                 *(uint32_t*)&dst[4 + y*stride]= color4;
327             }
328         }else if(b_w==4){
329             for(y=0; y < b_h; y++){
330                 *(uint32_t*)&dst[0 + y*stride]= color4;
331             }
332         }else{
333             for(y=0; y < b_h; y++){
334                 for(x=0; x < b_w; x++){
335                     dst[x + y*stride]= color;
336                 }
337             }
338         }
339     }else{
340         uint8_t *src= s->last_picture[block->ref].data[plane_index];
341         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
342         int mx= block->mx*scale;
343         int my= block->my*scale;
344         const int dx= mx&15;
345         const int dy= my&15;
346         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
347         sx += (mx>>4) - (HTAPS_MAX/2-1);
348         sy += (my>>4) - (HTAPS_MAX/2-1);
349         src += sx + sy*stride;
350         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
351            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
352             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
353             src= tmp + MB_SIZE;
354         }
355 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
356 //        assert(!(b_w&(b_w-1)));
357         assert(b_w>1 && b_h>1);
358         assert((tab_index>=0 && tab_index<4) || b_w==32);
359         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
360             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
361         else if(b_w==32){
362             int y;
363             for(y=0; y<b_h; y+=16){
364                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
365                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
366             }
367         }else if(b_w==b_h)
368             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
369         else if(b_w==2*b_h){
370             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
371             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
372         }else{
373             assert(2*b_w==b_h);
374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
375             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
376         }
377     }
378 }
379
380 #define mca(dx,dy,b_w)\
381 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
382     assert(h==b_w);\
383     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
384 }
385
386 mca( 0, 0,16)
387 mca( 8, 0,16)
388 mca( 0, 8,16)
389 mca( 8, 8,16)
390 mca( 0, 0,8)
391 mca( 8, 0,8)
392 mca( 0, 8,8)
393 mca( 8, 8,8)
394
395 av_cold int ff_snow_common_init(AVCodecContext *avctx){
396     SnowContext *s = avctx->priv_data;
397     int width, height;
398     int i, j, ret;
399     int emu_buf_size;
400
401     s->avctx= avctx;
402     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
403
404     ff_dsputil_init(&s->dsp, avctx);
405     ff_dwt_init(&s->dwt);
406
407 #define mcf(dx,dy)\
408     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
409     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
410         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
411     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
412     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
413         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
414
415     mcf( 0, 0)
416     mcf( 4, 0)
417     mcf( 8, 0)
418     mcf(12, 0)
419     mcf( 0, 4)
420     mcf( 4, 4)
421     mcf( 8, 4)
422     mcf(12, 4)
423     mcf( 0, 8)
424     mcf( 4, 8)
425     mcf( 8, 8)
426     mcf(12, 8)
427     mcf( 0,12)
428     mcf( 4,12)
429     mcf( 8,12)
430     mcf(12,12)
431
432 #define mcfh(dx,dy)\
433     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
434     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
435         mc_block_hpel ## dx ## dy ## 16;\
436     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
437     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
438         mc_block_hpel ## dx ## dy ## 8;
439
440     mcfh(0, 0)
441     mcfh(8, 0)
442     mcfh(0, 8)
443     mcfh(8, 8)
444
445     init_qexp();
446
447 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
448
449     width= s->avctx->width;
450     height= s->avctx->height;
451
452     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
453     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
454     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
455     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
456     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
457
458     for(i=0; i<MAX_REF_FRAMES; i++)
459         for(j=0; j<MAX_REF_FRAMES; j++)
460             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
461
462     if ((ret = ff_get_buffer(s->avctx, &s->mconly_picture)) < 0) {
463         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
464         return ret;
465     }
466     FF_ALLOC_OR_GOTO(avctx, s->scratchbuf, s->mconly_picture.linesize[0]*7*MB_SIZE, fail);
467     emu_buf_size = s->mconly_picture.linesize[0] * (2 * MB_SIZE + HTAPS_MAX - 1);
468     FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
469
470     return 0;
471 fail:
472     return AVERROR(ENOMEM);
473 }
474
475 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
476     SnowContext *s = avctx->priv_data;
477     int plane_index, level, orientation;
478
479     for(plane_index=0; plane_index<3; plane_index++){
480         int w= s->avctx->width;
481         int h= s->avctx->height;
482
483         if(plane_index){
484             w>>= s->chroma_h_shift;
485             h>>= s->chroma_v_shift;
486         }
487         s->plane[plane_index].width = w;
488         s->plane[plane_index].height= h;
489
490         for(level=s->spatial_decomposition_count-1; level>=0; level--){
491             for(orientation=level ? 1 : 0; orientation<4; orientation++){
492                 SubBand *b= &s->plane[plane_index].band[level][orientation];
493
494                 b->buf= s->spatial_dwt_buffer;
495                 b->level= level;
496                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
497                 b->width = (w + !(orientation&1))>>1;
498                 b->height= (h + !(orientation>1))>>1;
499
500                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
501                 b->buf_x_offset = 0;
502                 b->buf_y_offset = 0;
503
504                 if(orientation&1){
505                     b->buf += (w+1)>>1;
506                     b->buf_x_offset = (w+1)>>1;
507                 }
508                 if(orientation>1){
509                     b->buf += b->stride>>1;
510                     b->buf_y_offset = b->stride_line >> 1;
511                 }
512                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
513
514                 if(level)
515                     b->parent= &s->plane[plane_index].band[level-1][orientation];
516                 //FIXME avoid this realloc
517                 av_freep(&b->x_coeff);
518                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
519             }
520             w= (w+1)>>1;
521             h= (h+1)>>1;
522         }
523     }
524
525     return 0;
526 }
527
528 #define USE_HALFPEL_PLANE 0
529
530 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
531     int p,x,y;
532
533     for(p=0; p<3; p++){
534         int is_chroma= !!p;
535         int w= s->avctx->width  >>is_chroma;
536         int h= s->avctx->height >>is_chroma;
537         int ls= frame->linesize[p];
538         uint8_t *src= frame->data[p];
539
540         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
541         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
542         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
543
544         halfpel[0][p]= src;
545         for(y=0; y<h; y++){
546             for(x=0; x<w; x++){
547                 int i= y*ls + x;
548
549                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
550             }
551         }
552         for(y=0; y<h; y++){
553             for(x=0; x<w; x++){
554                 int i= y*ls + x;
555
556                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
557             }
558         }
559         src= halfpel[1][p];
560         for(y=0; y<h; y++){
561             for(x=0; x<w; x++){
562                 int i= y*ls + x;
563
564                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
565             }
566         }
567
568 //FIXME border!
569     }
570 }
571
572 void ff_snow_release_buffer(AVCodecContext *avctx)
573 {
574     SnowContext *s = avctx->priv_data;
575     int i;
576
577     if(s->last_picture[s->max_ref_frames-1].data[0]){
578         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
579         for(i=0; i<9; i++)
580             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
581                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
582     }
583 }
584
585 int ff_snow_frame_start(SnowContext *s){
586    AVFrame tmp;
587    int w= s->avctx->width; //FIXME round up to x16 ?
588    int h= s->avctx->height;
589
590     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
591         s->dsp.draw_edges(s->current_picture.data[0],
592                           s->current_picture.linesize[0], w   , h   ,
593                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
594         s->dsp.draw_edges(s->current_picture.data[1],
595                           s->current_picture.linesize[1], w>>1, h>>1,
596                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
597         s->dsp.draw_edges(s->current_picture.data[2],
598                           s->current_picture.linesize[2], w>>1, h>>1,
599                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
600     }
601
602     ff_snow_release_buffer(s->avctx);
603
604     tmp= s->last_picture[s->max_ref_frames-1];
605     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
606     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
607     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
608         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
609     s->last_picture[0]= s->current_picture;
610     s->current_picture= tmp;
611
612     if(s->keyframe){
613         s->ref_frames= 0;
614     }else{
615         int i;
616         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
617             if(i && s->last_picture[i-1].key_frame)
618                 break;
619         s->ref_frames= i;
620         if(s->ref_frames==0){
621             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
622             return -1;
623         }
624     }
625
626     s->current_picture.reference= 1;
627     if(ff_get_buffer(s->avctx, &s->current_picture) < 0){
628         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
629         return -1;
630     }
631
632     s->current_picture.key_frame= s->keyframe;
633
634     return 0;
635 }
636
637 av_cold void ff_snow_common_end(SnowContext *s)
638 {
639     int plane_index, level, orientation, i;
640
641     av_freep(&s->spatial_dwt_buffer);
642     av_freep(&s->temp_dwt_buffer);
643     av_freep(&s->spatial_idwt_buffer);
644     av_freep(&s->temp_idwt_buffer);
645     av_freep(&s->run_buffer);
646
647     s->m.me.temp= NULL;
648     av_freep(&s->m.me.scratchpad);
649     av_freep(&s->m.me.map);
650     av_freep(&s->m.me.score_map);
651     av_freep(&s->m.obmc_scratchpad);
652
653     av_freep(&s->block);
654     av_freep(&s->scratchbuf);
655     av_freep(&s->emu_edge_buffer);
656
657     for(i=0; i<MAX_REF_FRAMES; i++){
658         av_freep(&s->ref_mvs[i]);
659         av_freep(&s->ref_scores[i]);
660         if(s->last_picture[i].data[0])
661             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
662     }
663
664     for(plane_index=0; plane_index<3; plane_index++){
665         for(level=s->spatial_decomposition_count-1; level>=0; level--){
666             for(orientation=level ? 1 : 0; orientation<4; orientation++){
667                 SubBand *b= &s->plane[plane_index].band[level][orientation];
668
669                 av_freep(&b->x_coeff);
670             }
671         }
672     }
673     if (s->mconly_picture.data[0])
674         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
675     if (s->current_picture.data[0])
676         s->avctx->release_buffer(s->avctx, &s->current_picture);
677 }