]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
Merge commit 'a5ba798c16d0614d982a76755fdd72b37d437170'
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
70     int plane_index, level, orientation;
71
72     for(plane_index=0; plane_index<3; plane_index++){
73         for(level=0; level<MAX_DECOMPOSITIONS; level++){
74             for(orientation=level ? 1:0; orientation<4; orientation++){
75                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
76             }
77         }
78     }
79     memset(s->header_state, MID_STATE, sizeof(s->header_state));
80     memset(s->block_state, MID_STATE, sizeof(s->block_state));
81 }
82
83 int ff_snow_alloc_blocks(SnowContext *s){
84     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
85     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
86
87     s->b_width = w;
88     s->b_height= h;
89
90     av_free(s->block);
91     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
92     return 0;
93 }
94
95 static void init_qexp(void){
96     int i;
97     double v=128;
98
99     for(i=0; i<QROOT; i++){
100         ff_qexp[i]= lrintf(v);
101         v *= pow(2, 1.0 / QROOT);
102     }
103 }
104 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
105     static const uint8_t weight[64]={
106     8,7,6,5,4,3,2,1,
107     7,7,0,0,0,0,0,1,
108     6,0,6,0,0,0,2,0,
109     5,0,0,5,0,3,0,0,
110     4,0,0,0,4,0,0,0,
111     3,0,0,5,0,3,0,0,
112     2,0,6,0,0,0,2,0,
113     1,7,0,0,0,0,0,1,
114     };
115
116     static const uint8_t brane[256]={
117     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
118     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
119     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
120     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
121     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
122     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
123     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
124     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
125     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
126     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
127     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
128     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
129     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
130     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
131     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
132     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
133     };
134
135     static const uint8_t needs[16]={
136     0,1,0,0,
137     2,4,2,0,
138     0,1,0,0,
139     15
140     };
141
142     int x, y, b, r, l;
143     int16_t tmpIt   [64*(32+HTAPS_MAX)];
144     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
145     int16_t *tmpI= tmpIt;
146     uint8_t *tmp2= tmp2t[0];
147     const uint8_t *hpel[11];
148     av_assert2(dx<16 && dy<16);
149     r= brane[dx + 16*dy]&15;
150     l= brane[dx + 16*dy]>>4;
151
152     b= needs[l] | needs[r];
153     if(p && !p->diag_mc)
154         b= 15;
155
156     if(b&5){
157         for(y=0; y < b_h+HTAPS_MAX-1; y++){
158             for(x=0; x < b_w; x++){
159                 int a_1=src[x + HTAPS_MAX/2-4];
160                 int a0= src[x + HTAPS_MAX/2-3];
161                 int a1= src[x + HTAPS_MAX/2-2];
162                 int a2= src[x + HTAPS_MAX/2-1];
163                 int a3= src[x + HTAPS_MAX/2+0];
164                 int a4= src[x + HTAPS_MAX/2+1];
165                 int a5= src[x + HTAPS_MAX/2+2];
166                 int a6= src[x + HTAPS_MAX/2+3];
167                 int am=0;
168                 if(!p || p->fast_mc){
169                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
170                     tmpI[x]= am;
171                     am= (am+16)>>5;
172                 }else{
173                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
174                     tmpI[x]= am;
175                     am= (am+32)>>6;
176                 }
177
178                 if(am&(~255)) am= ~(am>>31);
179                 tmp2[x]= am;
180             }
181             tmpI+= 64;
182             tmp2+= 64;
183             src += stride;
184         }
185         src -= stride*y;
186     }
187     src += HTAPS_MAX/2 - 1;
188     tmp2= tmp2t[1];
189
190     if(b&2){
191         for(y=0; y < b_h; y++){
192             for(x=0; x < b_w+1; x++){
193                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
194                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
195                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
196                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
197                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
198                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
199                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
200                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
201                 int am=0;
202                 if(!p || p->fast_mc)
203                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
204                 else
205                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
206
207                 if(am&(~255)) am= ~(am>>31);
208                 tmp2[x]= am;
209             }
210             src += stride;
211             tmp2+= 64;
212         }
213         src -= stride*y;
214     }
215     src += stride*(HTAPS_MAX/2 - 1);
216     tmp2= tmp2t[2];
217     tmpI= tmpIt;
218     if(b&4){
219         for(y=0; y < b_h; y++){
220             for(x=0; x < b_w; x++){
221                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
222                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
223                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
224                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
225                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
226                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
227                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
228                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
229                 int am=0;
230                 if(!p || p->fast_mc)
231                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
232                 else
233                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
234                 if(am&(~255)) am= ~(am>>31);
235                 tmp2[x]= am;
236             }
237             tmpI+= 64;
238             tmp2+= 64;
239         }
240     }
241
242     hpel[ 0]= src;
243     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
244     hpel[ 2]= src + 1;
245
246     hpel[ 4]= tmp2t[1];
247     hpel[ 5]= tmp2t[2];
248     hpel[ 6]= tmp2t[1] + 1;
249
250     hpel[ 8]= src + stride;
251     hpel[ 9]= hpel[1] + 64;
252     hpel[10]= hpel[8] + 1;
253
254 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
255
256     if(b==15){
257         int dxy = dx / 8 + dy / 8 * 4;
258         const uint8_t *src1 = hpel[dxy    ];
259         const uint8_t *src2 = hpel[dxy + 1];
260         const uint8_t *src3 = hpel[dxy + 4];
261         const uint8_t *src4 = hpel[dxy + 5];
262         int stride1 = MC_STRIDE(dxy);
263         int stride2 = MC_STRIDE(dxy + 1);
264         int stride3 = MC_STRIDE(dxy + 4);
265         int stride4 = MC_STRIDE(dxy + 5);
266         dx&=7;
267         dy&=7;
268         for(y=0; y < b_h; y++){
269             for(x=0; x < b_w; x++){
270                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
271                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
272             }
273             src1+=stride1;
274             src2+=stride2;
275             src3+=stride3;
276             src4+=stride4;
277             dst +=stride;
278         }
279     }else{
280         const uint8_t *src1= hpel[l];
281         const uint8_t *src2= hpel[r];
282         int stride1 = MC_STRIDE(l);
283         int stride2 = MC_STRIDE(r);
284         int a= weight[((dx&7) + (8*(dy&7)))];
285         int b= 8-a;
286         for(y=0; y < b_h; y++){
287             for(x=0; x < b_w; x++){
288                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
289             }
290             src1+=stride1;
291             src2+=stride2;
292             dst +=stride;
293         }
294     }
295 }
296
297 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
298     if(block->type & BLOCK_INTRA){
299         int x, y;
300         const unsigned color  = block->color[plane_index];
301         const unsigned color4 = color*0x01010101;
302         if(b_w==32){
303             for(y=0; y < b_h; y++){
304                 *(uint32_t*)&dst[0 + y*stride]= color4;
305                 *(uint32_t*)&dst[4 + y*stride]= color4;
306                 *(uint32_t*)&dst[8 + y*stride]= color4;
307                 *(uint32_t*)&dst[12+ y*stride]= color4;
308                 *(uint32_t*)&dst[16+ y*stride]= color4;
309                 *(uint32_t*)&dst[20+ y*stride]= color4;
310                 *(uint32_t*)&dst[24+ y*stride]= color4;
311                 *(uint32_t*)&dst[28+ y*stride]= color4;
312             }
313         }else if(b_w==16){
314             for(y=0; y < b_h; y++){
315                 *(uint32_t*)&dst[0 + y*stride]= color4;
316                 *(uint32_t*)&dst[4 + y*stride]= color4;
317                 *(uint32_t*)&dst[8 + y*stride]= color4;
318                 *(uint32_t*)&dst[12+ y*stride]= color4;
319             }
320         }else if(b_w==8){
321             for(y=0; y < b_h; y++){
322                 *(uint32_t*)&dst[0 + y*stride]= color4;
323                 *(uint32_t*)&dst[4 + y*stride]= color4;
324             }
325         }else if(b_w==4){
326             for(y=0; y < b_h; y++){
327                 *(uint32_t*)&dst[0 + y*stride]= color4;
328             }
329         }else{
330             for(y=0; y < b_h; y++){
331                 for(x=0; x < b_w; x++){
332                     dst[x + y*stride]= color;
333                 }
334             }
335         }
336     }else{
337         uint8_t *src= s->last_picture[block->ref].data[plane_index];
338         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
339         int mx= block->mx*scale;
340         int my= block->my*scale;
341         const int dx= mx&15;
342         const int dy= my&15;
343         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
344         sx += (mx>>4) - (HTAPS_MAX/2-1);
345         sy += (my>>4) - (HTAPS_MAX/2-1);
346         src += sx + sy*stride;
347         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
348            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
349             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
350             src= tmp + MB_SIZE;
351         }
352
353         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
354
355 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
356 //        assert(!(b_w&(b_w-1)));
357         av_assert2(b_w>1 && b_h>1);
358         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
359         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
360             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
361         else if(b_w==32){
362             int y;
363             for(y=0; y<b_h; y+=16){
364                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
365                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
366             }
367         }else if(b_w==b_h)
368             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
369         else if(b_w==2*b_h){
370             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
371             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
372         }else{
373             av_assert2(2*b_w==b_h);
374             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
375             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
376         }
377     }
378 }
379
380 #define mca(dx,dy,b_w)\
381 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
382     av_assert2(h==b_w);\
383     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
384 }
385
386 mca( 0, 0,16)
387 mca( 8, 0,16)
388 mca( 0, 8,16)
389 mca( 8, 8,16)
390 mca( 0, 0,8)
391 mca( 8, 0,8)
392 mca( 0, 8,8)
393 mca( 8, 8,8)
394
395 av_cold int ff_snow_common_init(AVCodecContext *avctx){
396     SnowContext *s = avctx->priv_data;
397     int width, height;
398     int i, j;
399
400     s->avctx= avctx;
401     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
402
403     ff_dsputil_init(&s->dsp, avctx);
404     ff_videodsp_init(&s->vdsp, 8);
405     ff_dwt_init(&s->dwt);
406     ff_h264qpel_init(&s->h264qpel, 8);
407
408 #define mcf(dx,dy)\
409     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
410     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
411         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
412     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
413     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
414         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
415
416     mcf( 0, 0)
417     mcf( 4, 0)
418     mcf( 8, 0)
419     mcf(12, 0)
420     mcf( 0, 4)
421     mcf( 4, 4)
422     mcf( 8, 4)
423     mcf(12, 4)
424     mcf( 0, 8)
425     mcf( 4, 8)
426     mcf( 8, 8)
427     mcf(12, 8)
428     mcf( 0,12)
429     mcf( 4,12)
430     mcf( 8,12)
431     mcf(12,12)
432
433 #define mcfh(dx,dy)\
434     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
435     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
436         mc_block_hpel ## dx ## dy ## 16;\
437     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
438     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
439         mc_block_hpel ## dx ## dy ## 8;
440
441     mcfh(0, 0)
442     mcfh(8, 0)
443     mcfh(0, 8)
444     mcfh(8, 8)
445
446     init_qexp();
447
448 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
449
450     width= s->avctx->width;
451     height= s->avctx->height;
452
453     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
454     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
455     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
456     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
457     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
458
459     for(i=0; i<MAX_REF_FRAMES; i++)
460         for(j=0; j<MAX_REF_FRAMES; j++)
461             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
462
463     return 0;
464 fail:
465     return AVERROR(ENOMEM);
466 }
467
468 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
469     SnowContext *s = avctx->priv_data;
470     int plane_index, level, orientation;
471     int ret, emu_buf_size;
472
473     if(!s->scratchbuf) {
474         if ((ret = ff_get_buffer(s->avctx, &s->mconly_picture)) < 0) {
475             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
476             return ret;
477         }
478         FF_ALLOCZ_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256)*7*MB_SIZE, fail);
479         emu_buf_size = FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
480         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
481     }
482
483     if(s->mconly_picture.format != avctx->pix_fmt) {
484         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
485         return AVERROR_INVALIDDATA;
486     }
487
488     for(plane_index=0; plane_index<3; plane_index++){
489         int w= s->avctx->width;
490         int h= s->avctx->height;
491
492         if(plane_index){
493             w>>= s->chroma_h_shift;
494             h>>= s->chroma_v_shift;
495         }
496         s->plane[plane_index].width = w;
497         s->plane[plane_index].height= h;
498
499         for(level=s->spatial_decomposition_count-1; level>=0; level--){
500             for(orientation=level ? 1 : 0; orientation<4; orientation++){
501                 SubBand *b= &s->plane[plane_index].band[level][orientation];
502
503                 b->buf= s->spatial_dwt_buffer;
504                 b->level= level;
505                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
506                 b->width = (w + !(orientation&1))>>1;
507                 b->height= (h + !(orientation>1))>>1;
508
509                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
510                 b->buf_x_offset = 0;
511                 b->buf_y_offset = 0;
512
513                 if(orientation&1){
514                     b->buf += (w+1)>>1;
515                     b->buf_x_offset = (w+1)>>1;
516                 }
517                 if(orientation>1){
518                     b->buf += b->stride>>1;
519                     b->buf_y_offset = b->stride_line >> 1;
520                 }
521                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
522
523                 if(level)
524                     b->parent= &s->plane[plane_index].band[level-1][orientation];
525                 //FIXME avoid this realloc
526                 av_freep(&b->x_coeff);
527                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
528             }
529             w= (w+1)>>1;
530             h= (h+1)>>1;
531         }
532     }
533
534     return 0;
535 fail:
536     return AVERROR(ENOMEM);
537 }
538
539 #define USE_HALFPEL_PLANE 0
540
541 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
542     int p,x,y;
543
544     for(p=0; p<3; p++){
545         int is_chroma= !!p;
546         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
547         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
548         int ls= frame->linesize[p];
549         uint8_t *src= frame->data[p];
550
551         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
552         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
553         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
554
555         halfpel[0][p]= src;
556         for(y=0; y<h; y++){
557             for(x=0; x<w; x++){
558                 int i= y*ls + x;
559
560                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
561             }
562         }
563         for(y=0; y<h; y++){
564             for(x=0; x<w; x++){
565                 int i= y*ls + x;
566
567                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
568             }
569         }
570         src= halfpel[1][p];
571         for(y=0; y<h; y++){
572             for(x=0; x<w; x++){
573                 int i= y*ls + x;
574
575                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
576             }
577         }
578
579 //FIXME border!
580     }
581 }
582
583 void ff_snow_release_buffer(AVCodecContext *avctx)
584 {
585     SnowContext *s = avctx->priv_data;
586     int i;
587
588     if(s->last_picture[s->max_ref_frames-1].data[0]){
589         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
590         for(i=0; i<9; i++)
591             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
592                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
593     }
594 }
595
596 int ff_snow_frame_start(SnowContext *s){
597    AVFrame tmp;
598    int w= s->avctx->width; //FIXME round up to x16 ?
599    int h= s->avctx->height;
600
601     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
602         s->dsp.draw_edges(s->current_picture.data[0],
603                           s->current_picture.linesize[0], w   , h   ,
604                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
605         s->dsp.draw_edges(s->current_picture.data[1],
606                           s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
607                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
608         s->dsp.draw_edges(s->current_picture.data[2],
609                           s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
610                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
611     }
612
613     ff_snow_release_buffer(s->avctx);
614
615     tmp= s->last_picture[s->max_ref_frames-1];
616     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
617     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
618     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
619         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
620     s->last_picture[0]= s->current_picture;
621     s->current_picture= tmp;
622
623     if(s->keyframe){
624         s->ref_frames= 0;
625     }else{
626         int i;
627         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
628             if(i && s->last_picture[i-1].key_frame)
629                 break;
630         s->ref_frames= i;
631         if(s->ref_frames==0){
632             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
633             return -1;
634         }
635     }
636
637     s->current_picture.reference= 3;
638     if(ff_get_buffer(s->avctx, &s->current_picture) < 0){
639         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
640         return -1;
641     }
642
643     s->current_picture.key_frame= s->keyframe;
644
645     return 0;
646 }
647
648 av_cold void ff_snow_common_end(SnowContext *s)
649 {
650     int plane_index, level, orientation, i;
651
652     av_freep(&s->spatial_dwt_buffer);
653     av_freep(&s->temp_dwt_buffer);
654     av_freep(&s->spatial_idwt_buffer);
655     av_freep(&s->temp_idwt_buffer);
656     av_freep(&s->run_buffer);
657
658     s->m.me.temp= NULL;
659     av_freep(&s->m.me.scratchpad);
660     av_freep(&s->m.me.map);
661     av_freep(&s->m.me.score_map);
662     av_freep(&s->m.obmc_scratchpad);
663
664     av_freep(&s->block);
665     av_freep(&s->scratchbuf);
666     av_freep(&s->emu_edge_buffer);
667
668     for(i=0; i<MAX_REF_FRAMES; i++){
669         av_freep(&s->ref_mvs[i]);
670         av_freep(&s->ref_scores[i]);
671         if(s->last_picture[i].data[0]) {
672             av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
673             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
674         }
675     }
676
677     for(plane_index=0; plane_index<3; plane_index++){
678         for(level=s->spatial_decomposition_count-1; level>=0; level--){
679             for(orientation=level ? 1 : 0; orientation<4; orientation++){
680                 SubBand *b= &s->plane[plane_index].band[level][orientation];
681
682                 av_freep(&b->x_coeff);
683             }
684         }
685     }
686     if (s->mconly_picture.data[0])
687         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
688     if (s->current_picture.data[0])
689         s->avctx->release_buffer(s->avctx, &s->current_picture);
690 }