]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
huffyuv: change statistics initialization
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
70     int plane_index, level, orientation;
71
72     for(plane_index=0; plane_index<3; plane_index++){
73         for(level=0; level<MAX_DECOMPOSITIONS; level++){
74             for(orientation=level ? 1:0; orientation<4; orientation++){
75                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
76             }
77         }
78     }
79     memset(s->header_state, MID_STATE, sizeof(s->header_state));
80     memset(s->block_state, MID_STATE, sizeof(s->block_state));
81 }
82
83 int ff_snow_alloc_blocks(SnowContext *s){
84     int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
85     int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
86
87     s->b_width = w;
88     s->b_height= h;
89
90     av_free(s->block);
91     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
92     if (!s->block)
93         return AVERROR(ENOMEM);
94
95     return 0;
96 }
97
98 static av_cold void init_qexp(void){
99     int i;
100     double v=128;
101
102     for(i=0; i<QROOT; i++){
103         ff_qexp[i]= lrintf(v);
104         v *= pow(2, 1.0 / QROOT);
105     }
106 }
107 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
108     static const uint8_t weight[64]={
109     8,7,6,5,4,3,2,1,
110     7,7,0,0,0,0,0,1,
111     6,0,6,0,0,0,2,0,
112     5,0,0,5,0,3,0,0,
113     4,0,0,0,4,0,0,0,
114     3,0,0,5,0,3,0,0,
115     2,0,6,0,0,0,2,0,
116     1,7,0,0,0,0,0,1,
117     };
118
119     static const uint8_t brane[256]={
120     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
121     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
122     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
123     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
124     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
125     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
126     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
127     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
128     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
129     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
130     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
131     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
132     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
133     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
134     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
135     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
136     };
137
138     static const uint8_t needs[16]={
139     0,1,0,0,
140     2,4,2,0,
141     0,1,0,0,
142     15
143     };
144
145     int x, y, b, r, l;
146     int16_t tmpIt   [64*(32+HTAPS_MAX)];
147     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
148     int16_t *tmpI= tmpIt;
149     uint8_t *tmp2= tmp2t[0];
150     const uint8_t *hpel[11];
151     av_assert2(dx<16 && dy<16);
152     r= brane[dx + 16*dy]&15;
153     l= brane[dx + 16*dy]>>4;
154
155     b= needs[l] | needs[r];
156     if(p && !p->diag_mc)
157         b= 15;
158
159     if(b&5){
160         for(y=0; y < b_h+HTAPS_MAX-1; y++){
161             for(x=0; x < b_w; x++){
162                 int a_1=src[x + HTAPS_MAX/2-4];
163                 int a0= src[x + HTAPS_MAX/2-3];
164                 int a1= src[x + HTAPS_MAX/2-2];
165                 int a2= src[x + HTAPS_MAX/2-1];
166                 int a3= src[x + HTAPS_MAX/2+0];
167                 int a4= src[x + HTAPS_MAX/2+1];
168                 int a5= src[x + HTAPS_MAX/2+2];
169                 int a6= src[x + HTAPS_MAX/2+3];
170                 int am=0;
171                 if(!p || p->fast_mc){
172                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
173                     tmpI[x]= am;
174                     am= (am+16)>>5;
175                 }else{
176                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
177                     tmpI[x]= am;
178                     am= (am+32)>>6;
179                 }
180
181                 if(am&(~255)) am= ~(am>>31);
182                 tmp2[x]= am;
183             }
184             tmpI+= 64;
185             tmp2+= 64;
186             src += stride;
187         }
188         src -= stride*y;
189     }
190     src += HTAPS_MAX/2 - 1;
191     tmp2= tmp2t[1];
192
193     if(b&2){
194         for(y=0; y < b_h; y++){
195             for(x=0; x < b_w+1; x++){
196                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
197                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
198                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
199                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
200                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
201                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
202                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
203                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
204                 int am=0;
205                 if(!p || p->fast_mc)
206                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
207                 else
208                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
209
210                 if(am&(~255)) am= ~(am>>31);
211                 tmp2[x]= am;
212             }
213             src += stride;
214             tmp2+= 64;
215         }
216         src -= stride*y;
217     }
218     src += stride*(HTAPS_MAX/2 - 1);
219     tmp2= tmp2t[2];
220     tmpI= tmpIt;
221     if(b&4){
222         for(y=0; y < b_h; y++){
223             for(x=0; x < b_w; x++){
224                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
225                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
226                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
227                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
228                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
229                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
230                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
231                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
232                 int am=0;
233                 if(!p || p->fast_mc)
234                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
235                 else
236                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
237                 if(am&(~255)) am= ~(am>>31);
238                 tmp2[x]= am;
239             }
240             tmpI+= 64;
241             tmp2+= 64;
242         }
243     }
244
245     hpel[ 0]= src;
246     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
247     hpel[ 2]= src + 1;
248
249     hpel[ 4]= tmp2t[1];
250     hpel[ 5]= tmp2t[2];
251     hpel[ 6]= tmp2t[1] + 1;
252
253     hpel[ 8]= src + stride;
254     hpel[ 9]= hpel[1] + 64;
255     hpel[10]= hpel[8] + 1;
256
257 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
258
259     if(b==15){
260         int dxy = dx / 8 + dy / 8 * 4;
261         const uint8_t *src1 = hpel[dxy    ];
262         const uint8_t *src2 = hpel[dxy + 1];
263         const uint8_t *src3 = hpel[dxy + 4];
264         const uint8_t *src4 = hpel[dxy + 5];
265         int stride1 = MC_STRIDE(dxy);
266         int stride2 = MC_STRIDE(dxy + 1);
267         int stride3 = MC_STRIDE(dxy + 4);
268         int stride4 = MC_STRIDE(dxy + 5);
269         dx&=7;
270         dy&=7;
271         for(y=0; y < b_h; y++){
272             for(x=0; x < b_w; x++){
273                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
274                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
275             }
276             src1+=stride1;
277             src2+=stride2;
278             src3+=stride3;
279             src4+=stride4;
280             dst +=stride;
281         }
282     }else{
283         const uint8_t *src1= hpel[l];
284         const uint8_t *src2= hpel[r];
285         int stride1 = MC_STRIDE(l);
286         int stride2 = MC_STRIDE(r);
287         int a= weight[((dx&7) + (8*(dy&7)))];
288         int b= 8-a;
289         for(y=0; y < b_h; y++){
290             for(x=0; x < b_w; x++){
291                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
292             }
293             src1+=stride1;
294             src2+=stride2;
295             dst +=stride;
296         }
297     }
298 }
299
300 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
301     if(block->type & BLOCK_INTRA){
302         int x, y;
303         const unsigned color  = block->color[plane_index];
304         const unsigned color4 = color*0x01010101;
305         if(b_w==32){
306             for(y=0; y < b_h; y++){
307                 *(uint32_t*)&dst[0 + y*stride]= color4;
308                 *(uint32_t*)&dst[4 + y*stride]= color4;
309                 *(uint32_t*)&dst[8 + y*stride]= color4;
310                 *(uint32_t*)&dst[12+ y*stride]= color4;
311                 *(uint32_t*)&dst[16+ y*stride]= color4;
312                 *(uint32_t*)&dst[20+ y*stride]= color4;
313                 *(uint32_t*)&dst[24+ y*stride]= color4;
314                 *(uint32_t*)&dst[28+ y*stride]= color4;
315             }
316         }else if(b_w==16){
317             for(y=0; y < b_h; y++){
318                 *(uint32_t*)&dst[0 + y*stride]= color4;
319                 *(uint32_t*)&dst[4 + y*stride]= color4;
320                 *(uint32_t*)&dst[8 + y*stride]= color4;
321                 *(uint32_t*)&dst[12+ y*stride]= color4;
322             }
323         }else if(b_w==8){
324             for(y=0; y < b_h; y++){
325                 *(uint32_t*)&dst[0 + y*stride]= color4;
326                 *(uint32_t*)&dst[4 + y*stride]= color4;
327             }
328         }else if(b_w==4){
329             for(y=0; y < b_h; y++){
330                 *(uint32_t*)&dst[0 + y*stride]= color4;
331             }
332         }else{
333             for(y=0; y < b_h; y++){
334                 for(x=0; x < b_w; x++){
335                     dst[x + y*stride]= color;
336                 }
337             }
338         }
339     }else{
340         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
341         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
342         int mx= block->mx*scale;
343         int my= block->my*scale;
344         const int dx= mx&15;
345         const int dy= my&15;
346         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
347         sx += (mx>>4) - (HTAPS_MAX/2-1);
348         sy += (my>>4) - (HTAPS_MAX/2-1);
349         src += sx + sy*stride;
350         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
351            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
352             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
353                                      stride, stride,
354                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
355                                      sx, sy, w, h);
356             src= tmp + MB_SIZE;
357         }
358
359         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
360
361         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
362         if(    (dx&3) || (dy&3)
363             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
364             || (b_w&(b_w-1))
365             || b_w == 1
366             || b_h == 1
367             || !s->plane[plane_index].fast_mc )
368             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
369         else if(b_w==32){
370             int y;
371             for(y=0; y<b_h; y+=16){
372                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
373                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
374             }
375         }else if(b_w==b_h)
376             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
377         else if(b_w==2*b_h){
378             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
379             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
380         }else{
381             av_assert2(2*b_w==b_h);
382             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
383             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
384         }
385     }
386 }
387
388 #define mca(dx,dy,b_w)\
389 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
390     av_assert2(h==b_w);\
391     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
392 }
393
394 mca( 0, 0,16)
395 mca( 8, 0,16)
396 mca( 0, 8,16)
397 mca( 8, 8,16)
398 mca( 0, 0,8)
399 mca( 8, 0,8)
400 mca( 0, 8,8)
401 mca( 8, 8,8)
402
403 av_cold int ff_snow_common_init(AVCodecContext *avctx){
404     SnowContext *s = avctx->priv_data;
405     int width, height;
406     int i, j;
407
408     s->avctx= avctx;
409     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
410
411     ff_dsputil_init(&s->dsp, avctx);
412     ff_hpeldsp_init(&s->hdsp, avctx->flags);
413     ff_videodsp_init(&s->vdsp, 8);
414     ff_dwt_init(&s->dwt);
415     ff_h264qpel_init(&s->h264qpel, 8);
416
417 #define mcf(dx,dy)\
418     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
419     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
420         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
421     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
422     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
423         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
424
425     mcf( 0, 0)
426     mcf( 4, 0)
427     mcf( 8, 0)
428     mcf(12, 0)
429     mcf( 0, 4)
430     mcf( 4, 4)
431     mcf( 8, 4)
432     mcf(12, 4)
433     mcf( 0, 8)
434     mcf( 4, 8)
435     mcf( 8, 8)
436     mcf(12, 8)
437     mcf( 0,12)
438     mcf( 4,12)
439     mcf( 8,12)
440     mcf(12,12)
441
442 #define mcfh(dx,dy)\
443     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
444     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
445         mc_block_hpel ## dx ## dy ## 16;\
446     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
447     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
448         mc_block_hpel ## dx ## dy ## 8;
449
450     mcfh(0, 0)
451     mcfh(8, 0)
452     mcfh(0, 8)
453     mcfh(8, 8)
454
455     init_qexp();
456
457 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
458
459     width= s->avctx->width;
460     height= s->avctx->height;
461
462     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
463     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
464     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
465     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
466     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
467
468     for(i=0; i<MAX_REF_FRAMES; i++) {
469         for(j=0; j<MAX_REF_FRAMES; j++)
470             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
471         s->last_picture[i] = av_frame_alloc();
472         if (!s->last_picture[i])
473             goto fail;
474     }
475
476     s->mconly_picture = av_frame_alloc();
477     s->current_picture = av_frame_alloc();
478     if (!s->mconly_picture || !s->current_picture)
479         goto fail;
480
481     return 0;
482 fail:
483     return AVERROR(ENOMEM);
484 }
485
486 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
487     SnowContext *s = avctx->priv_data;
488     int plane_index, level, orientation;
489     int ret, emu_buf_size;
490
491     if(!s->scratchbuf) {
492         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
493                                  AV_GET_BUFFER_FLAG_REF)) < 0)
494             return ret;
495         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
496         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
497         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
498     }
499
500     if(s->mconly_picture->format != avctx->pix_fmt) {
501         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
502         return AVERROR_INVALIDDATA;
503     }
504
505     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
506         int w= s->avctx->width;
507         int h= s->avctx->height;
508
509         if(plane_index){
510             w>>= s->chroma_h_shift;
511             h>>= s->chroma_v_shift;
512         }
513         s->plane[plane_index].width = w;
514         s->plane[plane_index].height= h;
515
516         for(level=s->spatial_decomposition_count-1; level>=0; level--){
517             for(orientation=level ? 1 : 0; orientation<4; orientation++){
518                 SubBand *b= &s->plane[plane_index].band[level][orientation];
519
520                 b->buf= s->spatial_dwt_buffer;
521                 b->level= level;
522                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
523                 b->width = (w + !(orientation&1))>>1;
524                 b->height= (h + !(orientation>1))>>1;
525
526                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
527                 b->buf_x_offset = 0;
528                 b->buf_y_offset = 0;
529
530                 if(orientation&1){
531                     b->buf += (w+1)>>1;
532                     b->buf_x_offset = (w+1)>>1;
533                 }
534                 if(orientation>1){
535                     b->buf += b->stride>>1;
536                     b->buf_y_offset = b->stride_line >> 1;
537                 }
538                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
539
540                 if(level)
541                     b->parent= &s->plane[plane_index].band[level-1][orientation];
542                 //FIXME avoid this realloc
543                 av_freep(&b->x_coeff);
544                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
545                 if (!b->x_coeff)
546                     goto fail;
547             }
548             w= (w+1)>>1;
549             h= (h+1)>>1;
550         }
551     }
552
553     return 0;
554 fail:
555     return AVERROR(ENOMEM);
556 }
557
558 #define USE_HALFPEL_PLANE 0
559
560 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
561     int p,x,y;
562
563     for(p=0; p < s->nb_planes; p++){
564         int is_chroma= !!p;
565         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
566         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
567         int ls= frame->linesize[p];
568         uint8_t *src= frame->data[p];
569
570         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
571         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
572         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
573         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p])
574             return AVERROR(ENOMEM);
575
576         halfpel[0][p]= src;
577         for(y=0; y<h; y++){
578             for(x=0; x<w; x++){
579                 int i= y*ls + x;
580
581                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
582             }
583         }
584         for(y=0; y<h; y++){
585             for(x=0; x<w; x++){
586                 int i= y*ls + x;
587
588                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
589             }
590         }
591         src= halfpel[1][p];
592         for(y=0; y<h; y++){
593             for(x=0; x<w; x++){
594                 int i= y*ls + x;
595
596                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
597             }
598         }
599
600 //FIXME border!
601     }
602     return 0;
603 }
604
605 void ff_snow_release_buffer(AVCodecContext *avctx)
606 {
607     SnowContext *s = avctx->priv_data;
608     int i;
609
610     if(s->last_picture[s->max_ref_frames-1]->data[0]){
611         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
612         for(i=0; i<9; i++)
613             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
614                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
615     }
616 }
617
618 int ff_snow_frame_start(SnowContext *s){
619    AVFrame *tmp;
620    int i, ret;
621    int w= s->avctx->width; //FIXME round up to x16 ?
622    int h= s->avctx->height;
623
624     if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
625         s->dsp.draw_edges(s->current_picture->data[0],
626                           s->current_picture->linesize[0], w   , h   ,
627                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
628         if (s->current_picture->data[2]) {
629             s->dsp.draw_edges(s->current_picture->data[1],
630                             s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
631                             EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
632             s->dsp.draw_edges(s->current_picture->data[2],
633                             s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
634                             EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
635         }
636     }
637
638     ff_snow_release_buffer(s->avctx);
639
640     tmp= s->last_picture[s->max_ref_frames-1];
641     for(i=s->max_ref_frames-1; i>0; i--)
642         s->last_picture[i] = s->last_picture[i-1];
643     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
644     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
645         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
646             return ret;
647     }
648     s->last_picture[0] = s->current_picture;
649     s->current_picture = tmp;
650
651     if(s->keyframe){
652         s->ref_frames= 0;
653     }else{
654         int i;
655         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
656             if(i && s->last_picture[i-1]->key_frame)
657                 break;
658         s->ref_frames= i;
659         if(s->ref_frames==0){
660             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
661             return -1;
662         }
663     }
664
665     if ((ret = ff_get_buffer(s->avctx, s->current_picture, AV_GET_BUFFER_FLAG_REF)) < 0)
666         return ret;
667
668     s->current_picture->key_frame= s->keyframe;
669
670     return 0;
671 }
672
673 av_cold void ff_snow_common_end(SnowContext *s)
674 {
675     int plane_index, level, orientation, i;
676
677     av_freep(&s->spatial_dwt_buffer);
678     av_freep(&s->temp_dwt_buffer);
679     av_freep(&s->spatial_idwt_buffer);
680     av_freep(&s->temp_idwt_buffer);
681     av_freep(&s->run_buffer);
682
683     s->m.me.temp= NULL;
684     av_freep(&s->m.me.scratchpad);
685     av_freep(&s->m.me.map);
686     av_freep(&s->m.me.score_map);
687     av_freep(&s->m.obmc_scratchpad);
688
689     av_freep(&s->block);
690     av_freep(&s->scratchbuf);
691     av_freep(&s->emu_edge_buffer);
692
693     for(i=0; i<MAX_REF_FRAMES; i++){
694         av_freep(&s->ref_mvs[i]);
695         av_freep(&s->ref_scores[i]);
696         if(s->last_picture[i]->data[0]) {
697             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
698         }
699         av_frame_free(&s->last_picture[i]);
700     }
701
702     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
703         for(level=s->spatial_decomposition_count-1; level>=0; level--){
704             for(orientation=level ? 1 : 0; orientation<4; orientation++){
705                 SubBand *b= &s->plane[plane_index].band[level][orientation];
706
707                 av_freep(&b->x_coeff);
708             }
709         }
710     }
711     av_frame_free(&s->mconly_picture);
712     av_frame_free(&s->current_picture);
713 }