]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
ircamenc: 10l do not use avio_skip()
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "dwt.h"
27 #include "snow.h"
28 #include "snowdata.h"
29
30 #include "rangecoder.h"
31 #include "mathops.h"
32 #include "h263.h"
33
34
35 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
36                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
37     int y, x;
38     IDWTELEM * dst;
39     for(y=0; y<b_h; y++){
40         //FIXME ugly misuse of obmc_stride
41         const uint8_t *obmc1= obmc + y*obmc_stride;
42         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
43         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
44         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
45         dst = slice_buffer_get_line(sb, src_y + y);
46         for(x=0; x<b_w; x++){
47             int v=   obmc1[x] * block[3][x + y*src_stride]
48                     +obmc2[x] * block[2][x + y*src_stride]
49                     +obmc3[x] * block[1][x + y*src_stride]
50                     +obmc4[x] * block[0][x + y*src_stride];
51
52             v <<= 8 - LOG2_OBMC_MAX;
53             if(FRAC_BITS != 8){
54                 v >>= 8 - FRAC_BITS;
55             }
56             if(add){
57                 v += dst[x + src_x];
58                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
59                 if(v&(~255)) v= ~(v>>31);
60                 dst8[x + y*src_stride] = v;
61             }else{
62                 dst[x + src_x] -= v;
63             }
64         }
65     }
66 }
67
68 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
69     int plane_index, level, orientation;
70
71     for(plane_index=0; plane_index<3; plane_index++){
72         for(level=0; level<MAX_DECOMPOSITIONS; level++){
73             for(orientation=level ? 1:0; orientation<4; orientation++){
74                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
75             }
76         }
77     }
78     memset(s->header_state, MID_STATE, sizeof(s->header_state));
79     memset(s->block_state, MID_STATE, sizeof(s->block_state));
80 }
81
82 int ff_snow_alloc_blocks(SnowContext *s){
83     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
84     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
85
86     s->b_width = w;
87     s->b_height= h;
88
89     av_free(s->block);
90     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
91     return 0;
92 }
93
94 static void init_qexp(void){
95     int i;
96     double v=128;
97
98     for(i=0; i<QROOT; i++){
99         ff_qexp[i]= lrintf(v);
100         v *= pow(2, 1.0 / QROOT);
101     }
102 }
103 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
104     static const uint8_t weight[64]={
105     8,7,6,5,4,3,2,1,
106     7,7,0,0,0,0,0,1,
107     6,0,6,0,0,0,2,0,
108     5,0,0,5,0,3,0,0,
109     4,0,0,0,4,0,0,0,
110     3,0,0,5,0,3,0,0,
111     2,0,6,0,0,0,2,0,
112     1,7,0,0,0,0,0,1,
113     };
114
115     static const uint8_t brane[256]={
116     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
117     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
118     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
119     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
120     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
121     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
122     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
123     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
124     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
125     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
126     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
127     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
128     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
129     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
130     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
131     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
132     };
133
134     static const uint8_t needs[16]={
135     0,1,0,0,
136     2,4,2,0,
137     0,1,0,0,
138     15
139     };
140
141     int x, y, b, r, l;
142     int16_t tmpIt   [64*(32+HTAPS_MAX)];
143     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
144     int16_t *tmpI= tmpIt;
145     uint8_t *tmp2= tmp2t[0];
146     const uint8_t *hpel[11];
147     av_assert2(dx<16 && dy<16);
148     r= brane[dx + 16*dy]&15;
149     l= brane[dx + 16*dy]>>4;
150
151     b= needs[l] | needs[r];
152     if(p && !p->diag_mc)
153         b= 15;
154
155     if(b&5){
156         for(y=0; y < b_h+HTAPS_MAX-1; y++){
157             for(x=0; x < b_w; x++){
158                 int a_1=src[x + HTAPS_MAX/2-4];
159                 int a0= src[x + HTAPS_MAX/2-3];
160                 int a1= src[x + HTAPS_MAX/2-2];
161                 int a2= src[x + HTAPS_MAX/2-1];
162                 int a3= src[x + HTAPS_MAX/2+0];
163                 int a4= src[x + HTAPS_MAX/2+1];
164                 int a5= src[x + HTAPS_MAX/2+2];
165                 int a6= src[x + HTAPS_MAX/2+3];
166                 int am=0;
167                 if(!p || p->fast_mc){
168                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
169                     tmpI[x]= am;
170                     am= (am+16)>>5;
171                 }else{
172                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
173                     tmpI[x]= am;
174                     am= (am+32)>>6;
175                 }
176
177                 if(am&(~255)) am= ~(am>>31);
178                 tmp2[x]= am;
179             }
180             tmpI+= 64;
181             tmp2+= 64;
182             src += stride;
183         }
184         src -= stride*y;
185     }
186     src += HTAPS_MAX/2 - 1;
187     tmp2= tmp2t[1];
188
189     if(b&2){
190         for(y=0; y < b_h; y++){
191             for(x=0; x < b_w+1; x++){
192                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
193                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
194                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
195                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
196                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
197                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
198                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
199                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
200                 int am=0;
201                 if(!p || p->fast_mc)
202                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
203                 else
204                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
205
206                 if(am&(~255)) am= ~(am>>31);
207                 tmp2[x]= am;
208             }
209             src += stride;
210             tmp2+= 64;
211         }
212         src -= stride*y;
213     }
214     src += stride*(HTAPS_MAX/2 - 1);
215     tmp2= tmp2t[2];
216     tmpI= tmpIt;
217     if(b&4){
218         for(y=0; y < b_h; y++){
219             for(x=0; x < b_w; x++){
220                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
221                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
222                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
223                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
224                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
225                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
226                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
227                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
228                 int am=0;
229                 if(!p || p->fast_mc)
230                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
231                 else
232                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
233                 if(am&(~255)) am= ~(am>>31);
234                 tmp2[x]= am;
235             }
236             tmpI+= 64;
237             tmp2+= 64;
238         }
239     }
240
241     hpel[ 0]= src;
242     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
243     hpel[ 2]= src + 1;
244
245     hpel[ 4]= tmp2t[1];
246     hpel[ 5]= tmp2t[2];
247     hpel[ 6]= tmp2t[1] + 1;
248
249     hpel[ 8]= src + stride;
250     hpel[ 9]= hpel[1] + 64;
251     hpel[10]= hpel[8] + 1;
252
253 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
254
255     if(b==15){
256         int dxy = dx / 8 + dy / 8 * 4;
257         const uint8_t *src1 = hpel[dxy    ];
258         const uint8_t *src2 = hpel[dxy + 1];
259         const uint8_t *src3 = hpel[dxy + 4];
260         const uint8_t *src4 = hpel[dxy + 5];
261         int stride1 = MC_STRIDE(dxy);
262         int stride2 = MC_STRIDE(dxy + 1);
263         int stride3 = MC_STRIDE(dxy + 4);
264         int stride4 = MC_STRIDE(dxy + 5);
265         dx&=7;
266         dy&=7;
267         for(y=0; y < b_h; y++){
268             for(x=0; x < b_w; x++){
269                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
270                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
271             }
272             src1+=stride1;
273             src2+=stride2;
274             src3+=stride3;
275             src4+=stride4;
276             dst +=stride;
277         }
278     }else{
279         const uint8_t *src1= hpel[l];
280         const uint8_t *src2= hpel[r];
281         int stride1 = MC_STRIDE(l);
282         int stride2 = MC_STRIDE(r);
283         int a= weight[((dx&7) + (8*(dy&7)))];
284         int b= 8-a;
285         for(y=0; y < b_h; y++){
286             for(x=0; x < b_w; x++){
287                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
288             }
289             src1+=stride1;
290             src2+=stride2;
291             dst +=stride;
292         }
293     }
294 }
295
296 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
297     if(block->type & BLOCK_INTRA){
298         int x, y;
299         const unsigned color  = block->color[plane_index];
300         const unsigned color4 = color*0x01010101;
301         if(b_w==32){
302             for(y=0; y < b_h; y++){
303                 *(uint32_t*)&dst[0 + y*stride]= color4;
304                 *(uint32_t*)&dst[4 + y*stride]= color4;
305                 *(uint32_t*)&dst[8 + y*stride]= color4;
306                 *(uint32_t*)&dst[12+ y*stride]= color4;
307                 *(uint32_t*)&dst[16+ y*stride]= color4;
308                 *(uint32_t*)&dst[20+ y*stride]= color4;
309                 *(uint32_t*)&dst[24+ y*stride]= color4;
310                 *(uint32_t*)&dst[28+ y*stride]= color4;
311             }
312         }else if(b_w==16){
313             for(y=0; y < b_h; y++){
314                 *(uint32_t*)&dst[0 + y*stride]= color4;
315                 *(uint32_t*)&dst[4 + y*stride]= color4;
316                 *(uint32_t*)&dst[8 + y*stride]= color4;
317                 *(uint32_t*)&dst[12+ y*stride]= color4;
318             }
319         }else if(b_w==8){
320             for(y=0; y < b_h; y++){
321                 *(uint32_t*)&dst[0 + y*stride]= color4;
322                 *(uint32_t*)&dst[4 + y*stride]= color4;
323             }
324         }else if(b_w==4){
325             for(y=0; y < b_h; y++){
326                 *(uint32_t*)&dst[0 + y*stride]= color4;
327             }
328         }else{
329             for(y=0; y < b_h; y++){
330                 for(x=0; x < b_w; x++){
331                     dst[x + y*stride]= color;
332                 }
333             }
334         }
335     }else{
336         uint8_t *src= s->last_picture[block->ref].data[plane_index];
337         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
338         int mx= block->mx*scale;
339         int my= block->my*scale;
340         const int dx= mx&15;
341         const int dy= my&15;
342         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
343         sx += (mx>>4) - (HTAPS_MAX/2-1);
344         sy += (my>>4) - (HTAPS_MAX/2-1);
345         src += sx + sy*stride;
346         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
347            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
348             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
349             src= tmp + MB_SIZE;
350         }
351
352         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
353
354 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
355 //        assert(!(b_w&(b_w-1)));
356         av_assert2(b_w>1 && b_h>1);
357         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
358         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
359             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
360         else if(b_w==32){
361             int y;
362             for(y=0; y<b_h; y+=16){
363                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
364                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
365             }
366         }else if(b_w==b_h)
367             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
368         else if(b_w==2*b_h){
369             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
370             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
371         }else{
372             av_assert2(2*b_w==b_h);
373             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
375         }
376     }
377 }
378
379 #define mca(dx,dy,b_w)\
380 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
381     av_assert2(h==b_w);\
382     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
383 }
384
385 mca( 0, 0,16)
386 mca( 8, 0,16)
387 mca( 0, 8,16)
388 mca( 8, 8,16)
389 mca( 0, 0,8)
390 mca( 8, 0,8)
391 mca( 0, 8,8)
392 mca( 8, 8,8)
393
394 av_cold int ff_snow_common_init(AVCodecContext *avctx){
395     SnowContext *s = avctx->priv_data;
396     int width, height;
397     int i, j;
398
399     s->avctx= avctx;
400     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
401
402     ff_dsputil_init(&s->dsp, avctx);
403     ff_dwt_init(&s->dwt);
404
405 #define mcf(dx,dy)\
406     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
407     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
408         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
409     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
410     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
411         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
412
413     mcf( 0, 0)
414     mcf( 4, 0)
415     mcf( 8, 0)
416     mcf(12, 0)
417     mcf( 0, 4)
418     mcf( 4, 4)
419     mcf( 8, 4)
420     mcf(12, 4)
421     mcf( 0, 8)
422     mcf( 4, 8)
423     mcf( 8, 8)
424     mcf(12, 8)
425     mcf( 0,12)
426     mcf( 4,12)
427     mcf( 8,12)
428     mcf(12,12)
429
430 #define mcfh(dx,dy)\
431     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
432     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
433         mc_block_hpel ## dx ## dy ## 16;\
434     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
435     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
436         mc_block_hpel ## dx ## dy ## 8;
437
438     mcfh(0, 0)
439     mcfh(8, 0)
440     mcfh(0, 8)
441     mcfh(8, 8)
442
443     init_qexp();
444
445 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
446
447     width= s->avctx->width;
448     height= s->avctx->height;
449
450     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
451     FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
452     FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
453     FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
454     FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
455
456     for(i=0; i<MAX_REF_FRAMES; i++)
457         for(j=0; j<MAX_REF_FRAMES; j++)
458             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
459
460     return 0;
461 fail:
462     return AVERROR(ENOMEM);
463 }
464
465 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
466     SnowContext *s = avctx->priv_data;
467     int plane_index, level, orientation;
468     int ret, emu_buf_size;
469
470     if(!s->scratchbuf) {
471         if ((ret = s->avctx->get_buffer(s->avctx, &s->mconly_picture)) < 0) {
472             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
473             return ret;
474         }
475         FF_ALLOCZ_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256)*7*MB_SIZE, fail);
476         emu_buf_size = FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
477         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
478     }
479
480     if(s->mconly_picture.format != avctx->pix_fmt) {
481         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
482         return AVERROR_INVALIDDATA;
483     }
484
485     for(plane_index=0; plane_index<3; plane_index++){
486         int w= s->avctx->width;
487         int h= s->avctx->height;
488
489         if(plane_index){
490             w>>= s->chroma_h_shift;
491             h>>= s->chroma_v_shift;
492         }
493         s->plane[plane_index].width = w;
494         s->plane[plane_index].height= h;
495
496         for(level=s->spatial_decomposition_count-1; level>=0; level--){
497             for(orientation=level ? 1 : 0; orientation<4; orientation++){
498                 SubBand *b= &s->plane[plane_index].band[level][orientation];
499
500                 b->buf= s->spatial_dwt_buffer;
501                 b->level= level;
502                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
503                 b->width = (w + !(orientation&1))>>1;
504                 b->height= (h + !(orientation>1))>>1;
505
506                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
507                 b->buf_x_offset = 0;
508                 b->buf_y_offset = 0;
509
510                 if(orientation&1){
511                     b->buf += (w+1)>>1;
512                     b->buf_x_offset = (w+1)>>1;
513                 }
514                 if(orientation>1){
515                     b->buf += b->stride>>1;
516                     b->buf_y_offset = b->stride_line >> 1;
517                 }
518                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
519
520                 if(level)
521                     b->parent= &s->plane[plane_index].band[level-1][orientation];
522                 //FIXME avoid this realloc
523                 av_freep(&b->x_coeff);
524                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
525             }
526             w= (w+1)>>1;
527             h= (h+1)>>1;
528         }
529     }
530
531     return 0;
532 fail:
533     return AVERROR(ENOMEM);
534 }
535
536 #define USE_HALFPEL_PLANE 0
537
538 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
539     int p,x,y;
540
541     for(p=0; p<3; p++){
542         int is_chroma= !!p;
543         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
544         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
545         int ls= frame->linesize[p];
546         uint8_t *src= frame->data[p];
547
548         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
549         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
550         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
551
552         halfpel[0][p]= src;
553         for(y=0; y<h; y++){
554             for(x=0; x<w; x++){
555                 int i= y*ls + x;
556
557                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
558             }
559         }
560         for(y=0; y<h; y++){
561             for(x=0; x<w; x++){
562                 int i= y*ls + x;
563
564                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
565             }
566         }
567         src= halfpel[1][p];
568         for(y=0; y<h; y++){
569             for(x=0; x<w; x++){
570                 int i= y*ls + x;
571
572                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
573             }
574         }
575
576 //FIXME border!
577     }
578 }
579
580 void ff_snow_release_buffer(AVCodecContext *avctx)
581 {
582     SnowContext *s = avctx->priv_data;
583     int i;
584
585     if(s->last_picture[s->max_ref_frames-1].data[0]){
586         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
587         for(i=0; i<9; i++)
588             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
589                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
590     }
591 }
592
593 int ff_snow_frame_start(SnowContext *s){
594    AVFrame tmp;
595    int w= s->avctx->width; //FIXME round up to x16 ?
596    int h= s->avctx->height;
597
598     if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
599         s->dsp.draw_edges(s->current_picture.data[0],
600                           s->current_picture.linesize[0], w   , h   ,
601                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
602         s->dsp.draw_edges(s->current_picture.data[1],
603                           s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
604                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
605         s->dsp.draw_edges(s->current_picture.data[2],
606                           s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
607                           EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
608     }
609
610     ff_snow_release_buffer(s->avctx);
611
612     tmp= s->last_picture[s->max_ref_frames-1];
613     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
614     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
615     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
616         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
617     s->last_picture[0]= s->current_picture;
618     s->current_picture= tmp;
619
620     if(s->keyframe){
621         s->ref_frames= 0;
622     }else{
623         int i;
624         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
625             if(i && s->last_picture[i-1].key_frame)
626                 break;
627         s->ref_frames= i;
628         if(s->ref_frames==0){
629             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
630             return -1;
631         }
632     }
633
634     s->current_picture.reference= 3;
635     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
636         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
637         return -1;
638     }
639
640     s->current_picture.key_frame= s->keyframe;
641
642     return 0;
643 }
644
645 av_cold void ff_snow_common_end(SnowContext *s)
646 {
647     int plane_index, level, orientation, i;
648
649     av_freep(&s->spatial_dwt_buffer);
650     av_freep(&s->temp_dwt_buffer);
651     av_freep(&s->spatial_idwt_buffer);
652     av_freep(&s->temp_idwt_buffer);
653     av_freep(&s->run_buffer);
654
655     s->m.me.temp= NULL;
656     av_freep(&s->m.me.scratchpad);
657     av_freep(&s->m.me.map);
658     av_freep(&s->m.me.score_map);
659     av_freep(&s->m.obmc_scratchpad);
660
661     av_freep(&s->block);
662     av_freep(&s->scratchbuf);
663     av_freep(&s->emu_edge_buffer);
664
665     for(i=0; i<MAX_REF_FRAMES; i++){
666         av_freep(&s->ref_mvs[i]);
667         av_freep(&s->ref_scores[i]);
668         if(s->last_picture[i].data[0]) {
669             av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
670             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
671         }
672     }
673
674     for(plane_index=0; plane_index<3; plane_index++){
675         for(level=s->spatial_decomposition_count-1; level>=0; level--){
676             for(orientation=level ? 1 : 0; orientation<4; orientation++){
677                 SubBand *b= &s->plane[plane_index].band[level][orientation];
678
679                 av_freep(&b->x_coeff);
680             }
681         }
682     }
683     if (s->mconly_picture.data[0])
684         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
685     if (s->current_picture.data[0])
686         s->avctx->release_buffer(s->avctx, &s->current_picture);
687 }