]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
avformat/mpegtsenc: reindent the last commit
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "me_cmp.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71     int ret, i;
72     int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74     frame->width  = s->avctx->width ;
75     frame->height = s->avctx->height;
76     if (edges_needed) {
77         frame->width  += 2 * EDGE_WIDTH;
78         frame->height += 2 * EDGE_WIDTH;
79     }
80     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81         return ret;
82     if (edges_needed) {
83         for (i = 0; frame->data[i]; i++) {
84             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85                             frame->linesize[i] +
86                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87             frame->data[i] += offset;
88         }
89         frame->width  = s->avctx->width;
90         frame->height = s->avctx->height;
91     }
92
93     return 0;
94 }
95
96 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97     int plane_index, level, orientation;
98
99     for(plane_index=0; plane_index<3; plane_index++){
100         for(level=0; level<MAX_DECOMPOSITIONS; level++){
101             for(orientation=level ? 1:0; orientation<4; orientation++){
102                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103             }
104         }
105     }
106     memset(s->header_state, MID_STATE, sizeof(s->header_state));
107     memset(s->block_state, MID_STATE, sizeof(s->block_state));
108 }
109
110 int ff_snow_alloc_blocks(SnowContext *s){
111     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
112     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114     s->b_width = w;
115     s->b_height= h;
116
117     av_free(s->block);
118     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
119     if (!s->block)
120         return AVERROR(ENOMEM);
121
122     return 0;
123 }
124
125 static av_cold void init_qexp(void){
126     int i;
127     double v=128;
128
129     for(i=0; i<QROOT; i++){
130         ff_qexp[i]= lrintf(v);
131         v *= pow(2, 1.0 / QROOT);
132     }
133 }
134 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135     static const uint8_t weight[64]={
136     8,7,6,5,4,3,2,1,
137     7,7,0,0,0,0,0,1,
138     6,0,6,0,0,0,2,0,
139     5,0,0,5,0,3,0,0,
140     4,0,0,0,4,0,0,0,
141     3,0,0,5,0,3,0,0,
142     2,0,6,0,0,0,2,0,
143     1,7,0,0,0,0,0,1,
144     };
145
146     static const uint8_t brane[256]={
147     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163     };
164
165     static const uint8_t needs[16]={
166     0,1,0,0,
167     2,4,2,0,
168     0,1,0,0,
169     15
170     };
171
172     int x, y, b, r, l;
173     int16_t tmpIt   [64*(32+HTAPS_MAX)];
174     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175     int16_t *tmpI= tmpIt;
176     uint8_t *tmp2= tmp2t[0];
177     const uint8_t *hpel[11];
178     av_assert2(dx<16 && dy<16);
179     r= brane[dx + 16*dy]&15;
180     l= brane[dx + 16*dy]>>4;
181
182     b= needs[l] | needs[r];
183     if(p && !p->diag_mc)
184         b= 15;
185
186     if(b&5){
187         for(y=0; y < b_h+HTAPS_MAX-1; y++){
188             for(x=0; x < b_w; x++){
189                 int a_1=src[x + HTAPS_MAX/2-4];
190                 int a0= src[x + HTAPS_MAX/2-3];
191                 int a1= src[x + HTAPS_MAX/2-2];
192                 int a2= src[x + HTAPS_MAX/2-1];
193                 int a3= src[x + HTAPS_MAX/2+0];
194                 int a4= src[x + HTAPS_MAX/2+1];
195                 int a5= src[x + HTAPS_MAX/2+2];
196                 int a6= src[x + HTAPS_MAX/2+3];
197                 int am=0;
198                 if(!p || p->fast_mc){
199                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200                     tmpI[x]= am;
201                     am= (am+16)>>5;
202                 }else{
203                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204                     tmpI[x]= am;
205                     am= (am+32)>>6;
206                 }
207
208                 if(am&(~255)) am= ~(am>>31);
209                 tmp2[x]= am;
210             }
211             tmpI+= 64;
212             tmp2+= 64;
213             src += stride;
214         }
215         src -= stride*y;
216     }
217     src += HTAPS_MAX/2 - 1;
218     tmp2= tmp2t[1];
219
220     if(b&2){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w+1; x++){
223                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237                 if(am&(~255)) am= ~(am>>31);
238                 tmp2[x]= am;
239             }
240             src += stride;
241             tmp2+= 64;
242         }
243         src -= stride*y;
244     }
245     src += stride*(HTAPS_MAX/2 - 1);
246     tmp2= tmp2t[2];
247     tmpI= tmpIt;
248     if(b&4){
249         for(y=0; y < b_h; y++){
250             for(x=0; x < b_w; x++){
251                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259                 int am=0;
260                 if(!p || p->fast_mc)
261                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262                 else
263                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264                 if(am&(~255)) am= ~(am>>31);
265                 tmp2[x]= am;
266             }
267             tmpI+= 64;
268             tmp2+= 64;
269         }
270     }
271
272     hpel[ 0]= src;
273     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274     hpel[ 2]= src + 1;
275
276     hpel[ 4]= tmp2t[1];
277     hpel[ 5]= tmp2t[2];
278     hpel[ 6]= tmp2t[1] + 1;
279
280     hpel[ 8]= src + stride;
281     hpel[ 9]= hpel[1] + 64;
282     hpel[10]= hpel[8] + 1;
283
284 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286     if(b==15){
287         int dxy = dx / 8 + dy / 8 * 4;
288         const uint8_t *src1 = hpel[dxy    ];
289         const uint8_t *src2 = hpel[dxy + 1];
290         const uint8_t *src3 = hpel[dxy + 4];
291         const uint8_t *src4 = hpel[dxy + 5];
292         int stride1 = MC_STRIDE(dxy);
293         int stride2 = MC_STRIDE(dxy + 1);
294         int stride3 = MC_STRIDE(dxy + 4);
295         int stride4 = MC_STRIDE(dxy + 5);
296         dx&=7;
297         dy&=7;
298         for(y=0; y < b_h; y++){
299             for(x=0; x < b_w; x++){
300                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
302             }
303             src1+=stride1;
304             src2+=stride2;
305             src3+=stride3;
306             src4+=stride4;
307             dst +=stride;
308         }
309     }else{
310         const uint8_t *src1= hpel[l];
311         const uint8_t *src2= hpel[r];
312         int stride1 = MC_STRIDE(l);
313         int stride2 = MC_STRIDE(r);
314         int a= weight[((dx&7) + (8*(dy&7)))];
315         int b= 8-a;
316         for(y=0; y < b_h; y++){
317             for(x=0; x < b_w; x++){
318                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319             }
320             src1+=stride1;
321             src2+=stride2;
322             dst +=stride;
323         }
324     }
325 }
326
327 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
328     if(block->type & BLOCK_INTRA){
329         int x, y;
330         const unsigned color  = block->color[plane_index];
331         const unsigned color4 = color*0x01010101;
332         if(b_w==32){
333             for(y=0; y < b_h; y++){
334                 *(uint32_t*)&dst[0 + y*stride]= color4;
335                 *(uint32_t*)&dst[4 + y*stride]= color4;
336                 *(uint32_t*)&dst[8 + y*stride]= color4;
337                 *(uint32_t*)&dst[12+ y*stride]= color4;
338                 *(uint32_t*)&dst[16+ y*stride]= color4;
339                 *(uint32_t*)&dst[20+ y*stride]= color4;
340                 *(uint32_t*)&dst[24+ y*stride]= color4;
341                 *(uint32_t*)&dst[28+ y*stride]= color4;
342             }
343         }else if(b_w==16){
344             for(y=0; y < b_h; y++){
345                 *(uint32_t*)&dst[0 + y*stride]= color4;
346                 *(uint32_t*)&dst[4 + y*stride]= color4;
347                 *(uint32_t*)&dst[8 + y*stride]= color4;
348                 *(uint32_t*)&dst[12+ y*stride]= color4;
349             }
350         }else if(b_w==8){
351             for(y=0; y < b_h; y++){
352                 *(uint32_t*)&dst[0 + y*stride]= color4;
353                 *(uint32_t*)&dst[4 + y*stride]= color4;
354             }
355         }else if(b_w==4){
356             for(y=0; y < b_h; y++){
357                 *(uint32_t*)&dst[0 + y*stride]= color4;
358             }
359         }else{
360             for(y=0; y < b_h; y++){
361                 for(x=0; x < b_w; x++){
362                     dst[x + y*stride]= color;
363                 }
364             }
365         }
366     }else{
367         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369         int mx= block->mx*scale;
370         int my= block->my*scale;
371         const int dx= mx&15;
372         const int dy= my&15;
373         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374         sx += (mx>>4) - (HTAPS_MAX/2-1);
375         sy += (my>>4) - (HTAPS_MAX/2-1);
376         src += sx + sy*stride;
377         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380                                      stride, stride,
381                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382                                      sx, sy, w, h);
383             src= tmp + MB_SIZE;
384         }
385
386         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389         if(    (dx&3) || (dy&3)
390             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391             || (b_w&(b_w-1))
392             || b_w == 1
393             || b_h == 1
394             || !s->plane[plane_index].fast_mc )
395             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396         else if(b_w==32){
397             int y;
398             for(y=0; y<b_h; y+=16){
399                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401             }
402         }else if(b_w==b_h)
403             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404         else if(b_w==2*b_h){
405             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
406             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407         }else{
408             av_assert2(2*b_w==b_h);
409             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
410             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411         }
412     }
413 }
414
415 #define mca(dx,dy,b_w)\
416 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417     av_assert2(h==b_w);\
418     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419 }
420
421 mca( 0, 0,16)
422 mca( 8, 0,16)
423 mca( 0, 8,16)
424 mca( 8, 8,16)
425 mca( 0, 0,8)
426 mca( 8, 0,8)
427 mca( 0, 8,8)
428 mca( 8, 8,8)
429
430 av_cold int ff_snow_common_init(AVCodecContext *avctx){
431     SnowContext *s = avctx->priv_data;
432     int width, height;
433     int i, j;
434
435     s->avctx= avctx;
436     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437     s->spatial_decomposition_count = 1;
438
439     ff_me_cmp_init(&s->mecc, avctx);
440     ff_hpeldsp_init(&s->hdsp, avctx->flags);
441     ff_videodsp_init(&s->vdsp, 8);
442     ff_dwt_init(&s->dwt);
443     ff_h264qpel_init(&s->h264qpel, 8);
444
445 #define mcf(dx,dy)\
446     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
447     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
448         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
449     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
450     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
451         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
452
453     mcf( 0, 0)
454     mcf( 4, 0)
455     mcf( 8, 0)
456     mcf(12, 0)
457     mcf( 0, 4)
458     mcf( 4, 4)
459     mcf( 8, 4)
460     mcf(12, 4)
461     mcf( 0, 8)
462     mcf( 4, 8)
463     mcf( 8, 8)
464     mcf(12, 8)
465     mcf( 0,12)
466     mcf( 4,12)
467     mcf( 8,12)
468     mcf(12,12)
469
470 #define mcfh(dx,dy)\
471     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
472     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
473         mc_block_hpel ## dx ## dy ## 16;\
474     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
475     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
476         mc_block_hpel ## dx ## dy ## 8;
477
478     mcfh(0, 0)
479     mcfh(8, 0)
480     mcfh(0, 8)
481     mcfh(8, 8)
482
483     init_qexp();
484
485 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
486
487     width= s->avctx->width;
488     height= s->avctx->height;
489
490     if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
491         !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer,  width * height) ||  //FIXME this does not belong here
492         !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer,     width)          ||
493         !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer,    width)          ||
494         !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
495         return AVERROR(ENOMEM);
496
497     for(i=0; i<MAX_REF_FRAMES; i++) {
498         for(j=0; j<MAX_REF_FRAMES; j++)
499             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
500         s->last_picture[i] = av_frame_alloc();
501         if (!s->last_picture[i])
502             return AVERROR(ENOMEM);
503     }
504
505     s->mconly_picture = av_frame_alloc();
506     s->current_picture = av_frame_alloc();
507     if (!s->mconly_picture || !s->current_picture)
508         return AVERROR(ENOMEM);
509
510     return 0;
511 }
512
513 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
514     SnowContext *s = avctx->priv_data;
515     int plane_index, level, orientation;
516     int ret, emu_buf_size;
517
518     if(!s->scratchbuf) {
519         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
520                                  AV_GET_BUFFER_FLAG_REF)) < 0)
521             return ret;
522         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
523         if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf,      FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
524             !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
525             return AVERROR(ENOMEM);
526     }
527
528     if(s->mconly_picture->format != avctx->pix_fmt) {
529         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
530         return AVERROR_INVALIDDATA;
531     }
532
533     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
534         int w= s->avctx->width;
535         int h= s->avctx->height;
536
537         if(plane_index){
538             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
539             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
540         }
541         s->plane[plane_index].width = w;
542         s->plane[plane_index].height= h;
543
544         for(level=s->spatial_decomposition_count-1; level>=0; level--){
545             for(orientation=level ? 1 : 0; orientation<4; orientation++){
546                 SubBand *b= &s->plane[plane_index].band[level][orientation];
547
548                 b->buf= s->spatial_dwt_buffer;
549                 b->level= level;
550                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
551                 b->width = (w + !(orientation&1))>>1;
552                 b->height= (h + !(orientation>1))>>1;
553
554                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
555                 b->buf_x_offset = 0;
556                 b->buf_y_offset = 0;
557
558                 if(orientation&1){
559                     b->buf += (w+1)>>1;
560                     b->buf_x_offset = (w+1)>>1;
561                 }
562                 if(orientation>1){
563                     b->buf += b->stride>>1;
564                     b->buf_y_offset = b->stride_line >> 1;
565                 }
566                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
567
568                 if(level)
569                     b->parent= &s->plane[plane_index].band[level-1][orientation];
570                 //FIXME avoid this realloc
571                 av_freep(&b->x_coeff);
572                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
573                 if (!b->x_coeff)
574                     return AVERROR(ENOMEM);
575             }
576             w= (w+1)>>1;
577             h= (h+1)>>1;
578         }
579     }
580
581     return 0;
582 }
583
584 #define USE_HALFPEL_PLANE 0
585
586 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
587     int p,x,y;
588
589     for(p=0; p < s->nb_planes; p++){
590         int is_chroma= !!p;
591         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
592         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
593         int ls= frame->linesize[p];
594         uint8_t *src= frame->data[p];
595
596         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
597         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
598         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
599         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
600             av_freep(&halfpel[1][p]);
601             av_freep(&halfpel[2][p]);
602             av_freep(&halfpel[3][p]);
603             return AVERROR(ENOMEM);
604         }
605         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
606         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
607         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
608
609         halfpel[0][p]= src;
610         for(y=0; y<h; y++){
611             for(x=0; x<w; x++){
612                 int i= y*ls + x;
613
614                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
615             }
616         }
617         for(y=0; y<h; y++){
618             for(x=0; x<w; x++){
619                 int i= y*ls + x;
620
621                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
622             }
623         }
624         src= halfpel[1][p];
625         for(y=0; y<h; y++){
626             for(x=0; x<w; x++){
627                 int i= y*ls + x;
628
629                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
630             }
631         }
632
633 //FIXME border!
634     }
635     return 0;
636 }
637
638 void ff_snow_release_buffer(AVCodecContext *avctx)
639 {
640     SnowContext *s = avctx->priv_data;
641     int i;
642
643     if(s->last_picture[s->max_ref_frames-1]->data[0]){
644         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
645         for(i=0; i<9; i++)
646             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
647                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
648                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
649             }
650     }
651 }
652
653 int ff_snow_frame_start(SnowContext *s){
654    AVFrame *tmp;
655    int i, ret;
656
657     ff_snow_release_buffer(s->avctx);
658
659     tmp= s->last_picture[s->max_ref_frames-1];
660     for(i=s->max_ref_frames-1; i>0; i--)
661         s->last_picture[i] = s->last_picture[i-1];
662     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
663     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
664         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
665             return ret;
666     }
667     s->last_picture[0] = s->current_picture;
668     s->current_picture = tmp;
669
670     if(s->keyframe){
671         s->ref_frames= 0;
672     }else{
673         int i;
674         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
675             if(i && s->last_picture[i-1]->key_frame)
676                 break;
677         s->ref_frames= i;
678         if(s->ref_frames==0){
679             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
680             return AVERROR_INVALIDDATA;
681         }
682     }
683     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
684         return ret;
685
686     s->current_picture->key_frame= s->keyframe;
687
688     return 0;
689 }
690
691 av_cold void ff_snow_common_end(SnowContext *s)
692 {
693     int plane_index, level, orientation, i;
694
695     av_freep(&s->spatial_dwt_buffer);
696     av_freep(&s->temp_dwt_buffer);
697     av_freep(&s->spatial_idwt_buffer);
698     av_freep(&s->temp_idwt_buffer);
699     av_freep(&s->run_buffer);
700
701     s->m.me.temp= NULL;
702     av_freep(&s->m.me.scratchpad);
703     av_freep(&s->m.me.map);
704     av_freep(&s->m.me.score_map);
705     av_freep(&s->m.sc.obmc_scratchpad);
706
707     av_freep(&s->block);
708     av_freep(&s->scratchbuf);
709     av_freep(&s->emu_edge_buffer);
710
711     for(i=0; i<MAX_REF_FRAMES; i++){
712         av_freep(&s->ref_mvs[i]);
713         av_freep(&s->ref_scores[i]);
714         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
715             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
716         }
717         av_frame_free(&s->last_picture[i]);
718     }
719
720     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
721         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
722             for(orientation=level ? 1 : 0; orientation<4; orientation++){
723                 SubBand *b= &s->plane[plane_index].band[level][orientation];
724
725                 av_freep(&b->x_coeff);
726             }
727         }
728     }
729     av_frame_free(&s->mconly_picture);
730     av_frame_free(&s->current_picture);
731 }