]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
Merge commit '7650caf013f45ebebf128855735a0c6350836ea4'
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "me_cmp.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71     int ret, i;
72     int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74     frame->width  = s->avctx->width ;
75     frame->height = s->avctx->height;
76     if (edges_needed) {
77         frame->width  += 2 * EDGE_WIDTH;
78         frame->height += 2 * EDGE_WIDTH;
79     }
80     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81         return ret;
82     if (edges_needed) {
83         for (i = 0; frame->data[i]; i++) {
84             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85                             frame->linesize[i] +
86                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87             frame->data[i] += offset;
88         }
89         frame->width  = s->avctx->width;
90         frame->height = s->avctx->height;
91     }
92
93     return 0;
94 }
95
96 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97     int plane_index, level, orientation;
98
99     for(plane_index=0; plane_index<3; plane_index++){
100         for(level=0; level<MAX_DECOMPOSITIONS; level++){
101             for(orientation=level ? 1:0; orientation<4; orientation++){
102                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103             }
104         }
105     }
106     memset(s->header_state, MID_STATE, sizeof(s->header_state));
107     memset(s->block_state, MID_STATE, sizeof(s->block_state));
108 }
109
110 int ff_snow_alloc_blocks(SnowContext *s){
111     int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
112     int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114     s->b_width = w;
115     s->b_height= h;
116
117     av_free(s->block);
118     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
119     if (!s->block)
120         return AVERROR(ENOMEM);
121
122     return 0;
123 }
124
125 static av_cold void init_qexp(void){
126     int i;
127     double v=128;
128
129     for(i=0; i<QROOT; i++){
130         ff_qexp[i]= lrintf(v);
131         v *= pow(2, 1.0 / QROOT);
132     }
133 }
134 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135     static const uint8_t weight[64]={
136     8,7,6,5,4,3,2,1,
137     7,7,0,0,0,0,0,1,
138     6,0,6,0,0,0,2,0,
139     5,0,0,5,0,3,0,0,
140     4,0,0,0,4,0,0,0,
141     3,0,0,5,0,3,0,0,
142     2,0,6,0,0,0,2,0,
143     1,7,0,0,0,0,0,1,
144     };
145
146     static const uint8_t brane[256]={
147     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163     };
164
165     static const uint8_t needs[16]={
166     0,1,0,0,
167     2,4,2,0,
168     0,1,0,0,
169     15
170     };
171
172     int x, y, b, r, l;
173     int16_t tmpIt   [64*(32+HTAPS_MAX)];
174     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175     int16_t *tmpI= tmpIt;
176     uint8_t *tmp2= tmp2t[0];
177     const uint8_t *hpel[11];
178     av_assert2(dx<16 && dy<16);
179     r= brane[dx + 16*dy]&15;
180     l= brane[dx + 16*dy]>>4;
181
182     b= needs[l] | needs[r];
183     if(p && !p->diag_mc)
184         b= 15;
185
186     if(b&5){
187         for(y=0; y < b_h+HTAPS_MAX-1; y++){
188             for(x=0; x < b_w; x++){
189                 int a_1=src[x + HTAPS_MAX/2-4];
190                 int a0= src[x + HTAPS_MAX/2-3];
191                 int a1= src[x + HTAPS_MAX/2-2];
192                 int a2= src[x + HTAPS_MAX/2-1];
193                 int a3= src[x + HTAPS_MAX/2+0];
194                 int a4= src[x + HTAPS_MAX/2+1];
195                 int a5= src[x + HTAPS_MAX/2+2];
196                 int a6= src[x + HTAPS_MAX/2+3];
197                 int am=0;
198                 if(!p || p->fast_mc){
199                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200                     tmpI[x]= am;
201                     am= (am+16)>>5;
202                 }else{
203                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204                     tmpI[x]= am;
205                     am= (am+32)>>6;
206                 }
207
208                 if(am&(~255)) am= ~(am>>31);
209                 tmp2[x]= am;
210             }
211             tmpI+= 64;
212             tmp2+= 64;
213             src += stride;
214         }
215         src -= stride*y;
216     }
217     src += HTAPS_MAX/2 - 1;
218     tmp2= tmp2t[1];
219
220     if(b&2){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w+1; x++){
223                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237                 if(am&(~255)) am= ~(am>>31);
238                 tmp2[x]= am;
239             }
240             src += stride;
241             tmp2+= 64;
242         }
243         src -= stride*y;
244     }
245     src += stride*(HTAPS_MAX/2 - 1);
246     tmp2= tmp2t[2];
247     tmpI= tmpIt;
248     if(b&4){
249         for(y=0; y < b_h; y++){
250             for(x=0; x < b_w; x++){
251                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259                 int am=0;
260                 if(!p || p->fast_mc)
261                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262                 else
263                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264                 if(am&(~255)) am= ~(am>>31);
265                 tmp2[x]= am;
266             }
267             tmpI+= 64;
268             tmp2+= 64;
269         }
270     }
271
272     hpel[ 0]= src;
273     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274     hpel[ 2]= src + 1;
275
276     hpel[ 4]= tmp2t[1];
277     hpel[ 5]= tmp2t[2];
278     hpel[ 6]= tmp2t[1] + 1;
279
280     hpel[ 8]= src + stride;
281     hpel[ 9]= hpel[1] + 64;
282     hpel[10]= hpel[8] + 1;
283
284 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286     if(b==15){
287         int dxy = dx / 8 + dy / 8 * 4;
288         const uint8_t *src1 = hpel[dxy    ];
289         const uint8_t *src2 = hpel[dxy + 1];
290         const uint8_t *src3 = hpel[dxy + 4];
291         const uint8_t *src4 = hpel[dxy + 5];
292         int stride1 = MC_STRIDE(dxy);
293         int stride2 = MC_STRIDE(dxy + 1);
294         int stride3 = MC_STRIDE(dxy + 4);
295         int stride4 = MC_STRIDE(dxy + 5);
296         dx&=7;
297         dy&=7;
298         for(y=0; y < b_h; y++){
299             for(x=0; x < b_w; x++){
300                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
302             }
303             src1+=stride1;
304             src2+=stride2;
305             src3+=stride3;
306             src4+=stride4;
307             dst +=stride;
308         }
309     }else{
310         const uint8_t *src1= hpel[l];
311         const uint8_t *src2= hpel[r];
312         int stride1 = MC_STRIDE(l);
313         int stride2 = MC_STRIDE(r);
314         int a= weight[((dx&7) + (8*(dy&7)))];
315         int b= 8-a;
316         for(y=0; y < b_h; y++){
317             for(x=0; x < b_w; x++){
318                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319             }
320             src1+=stride1;
321             src2+=stride2;
322             dst +=stride;
323         }
324     }
325 }
326
327 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
328     if(block->type & BLOCK_INTRA){
329         int x, y;
330         const unsigned color  = block->color[plane_index];
331         const unsigned color4 = color*0x01010101;
332         if(b_w==32){
333             for(y=0; y < b_h; y++){
334                 *(uint32_t*)&dst[0 + y*stride]= color4;
335                 *(uint32_t*)&dst[4 + y*stride]= color4;
336                 *(uint32_t*)&dst[8 + y*stride]= color4;
337                 *(uint32_t*)&dst[12+ y*stride]= color4;
338                 *(uint32_t*)&dst[16+ y*stride]= color4;
339                 *(uint32_t*)&dst[20+ y*stride]= color4;
340                 *(uint32_t*)&dst[24+ y*stride]= color4;
341                 *(uint32_t*)&dst[28+ y*stride]= color4;
342             }
343         }else if(b_w==16){
344             for(y=0; y < b_h; y++){
345                 *(uint32_t*)&dst[0 + y*stride]= color4;
346                 *(uint32_t*)&dst[4 + y*stride]= color4;
347                 *(uint32_t*)&dst[8 + y*stride]= color4;
348                 *(uint32_t*)&dst[12+ y*stride]= color4;
349             }
350         }else if(b_w==8){
351             for(y=0; y < b_h; y++){
352                 *(uint32_t*)&dst[0 + y*stride]= color4;
353                 *(uint32_t*)&dst[4 + y*stride]= color4;
354             }
355         }else if(b_w==4){
356             for(y=0; y < b_h; y++){
357                 *(uint32_t*)&dst[0 + y*stride]= color4;
358             }
359         }else{
360             for(y=0; y < b_h; y++){
361                 for(x=0; x < b_w; x++){
362                     dst[x + y*stride]= color;
363                 }
364             }
365         }
366     }else{
367         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369         int mx= block->mx*scale;
370         int my= block->my*scale;
371         const int dx= mx&15;
372         const int dy= my&15;
373         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374         sx += (mx>>4) - (HTAPS_MAX/2-1);
375         sy += (my>>4) - (HTAPS_MAX/2-1);
376         src += sx + sy*stride;
377         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380                                      stride, stride,
381                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382                                      sx, sy, w, h);
383             src= tmp + MB_SIZE;
384         }
385
386         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389         if(    (dx&3) || (dy&3)
390             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391             || (b_w&(b_w-1))
392             || b_w == 1
393             || b_h == 1
394             || !s->plane[plane_index].fast_mc )
395             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396         else if(b_w==32){
397             int y;
398             for(y=0; y<b_h; y+=16){
399                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401             }
402         }else if(b_w==b_h)
403             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404         else if(b_w==2*b_h){
405             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
406             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407         }else{
408             av_assert2(2*b_w==b_h);
409             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
410             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411         }
412     }
413 }
414
415 #define mca(dx,dy,b_w)\
416 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417     av_assert2(h==b_w);\
418     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419 }
420
421 mca( 0, 0,16)
422 mca( 8, 0,16)
423 mca( 0, 8,16)
424 mca( 8, 8,16)
425 mca( 0, 0,8)
426 mca( 8, 0,8)
427 mca( 0, 8,8)
428 mca( 8, 8,8)
429
430 av_cold int ff_snow_common_init(AVCodecContext *avctx){
431     SnowContext *s = avctx->priv_data;
432     int width, height;
433     int i, j;
434
435     s->avctx= avctx;
436     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437
438     ff_me_cmp_init(&s->mecc, avctx);
439     ff_hpeldsp_init(&s->hdsp, avctx->flags);
440     ff_videodsp_init(&s->vdsp, 8);
441     ff_dwt_init(&s->dwt);
442     ff_h264qpel_init(&s->h264qpel, 8);
443
444 #define mcf(dx,dy)\
445     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
446     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
447         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
448     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
449     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
450         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
451
452     mcf( 0, 0)
453     mcf( 4, 0)
454     mcf( 8, 0)
455     mcf(12, 0)
456     mcf( 0, 4)
457     mcf( 4, 4)
458     mcf( 8, 4)
459     mcf(12, 4)
460     mcf( 0, 8)
461     mcf( 4, 8)
462     mcf( 8, 8)
463     mcf(12, 8)
464     mcf( 0,12)
465     mcf( 4,12)
466     mcf( 8,12)
467     mcf(12,12)
468
469 #define mcfh(dx,dy)\
470     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
471     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
472         mc_block_hpel ## dx ## dy ## 16;\
473     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
474     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
475         mc_block_hpel ## dx ## dy ## 8;
476
477     mcfh(0, 0)
478     mcfh(8, 0)
479     mcfh(0, 8)
480     mcfh(8, 8)
481
482     init_qexp();
483
484 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
485
486     width= s->avctx->width;
487     height= s->avctx->height;
488
489     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
490     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
491     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
492     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
493     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
494
495     for(i=0; i<MAX_REF_FRAMES; i++) {
496         for(j=0; j<MAX_REF_FRAMES; j++)
497             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
498         s->last_picture[i] = av_frame_alloc();
499         if (!s->last_picture[i])
500             goto fail;
501     }
502
503     s->mconly_picture = av_frame_alloc();
504     s->current_picture = av_frame_alloc();
505     if (!s->mconly_picture || !s->current_picture)
506         goto fail;
507
508     return 0;
509 fail:
510     return AVERROR(ENOMEM);
511 }
512
513 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
514     SnowContext *s = avctx->priv_data;
515     int plane_index, level, orientation;
516     int ret, emu_buf_size;
517
518     if(!s->scratchbuf) {
519         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
520                                  AV_GET_BUFFER_FLAG_REF)) < 0)
521             return ret;
522         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
523         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
524         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
525     }
526
527     if(s->mconly_picture->format != avctx->pix_fmt) {
528         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
529         return AVERROR_INVALIDDATA;
530     }
531
532     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
533         int w= s->avctx->width;
534         int h= s->avctx->height;
535
536         if(plane_index){
537             w>>= s->chroma_h_shift;
538             h>>= s->chroma_v_shift;
539         }
540         s->plane[plane_index].width = w;
541         s->plane[plane_index].height= h;
542
543         for(level=s->spatial_decomposition_count-1; level>=0; level--){
544             for(orientation=level ? 1 : 0; orientation<4; orientation++){
545                 SubBand *b= &s->plane[plane_index].band[level][orientation];
546
547                 b->buf= s->spatial_dwt_buffer;
548                 b->level= level;
549                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
550                 b->width = (w + !(orientation&1))>>1;
551                 b->height= (h + !(orientation>1))>>1;
552
553                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
554                 b->buf_x_offset = 0;
555                 b->buf_y_offset = 0;
556
557                 if(orientation&1){
558                     b->buf += (w+1)>>1;
559                     b->buf_x_offset = (w+1)>>1;
560                 }
561                 if(orientation>1){
562                     b->buf += b->stride>>1;
563                     b->buf_y_offset = b->stride_line >> 1;
564                 }
565                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
566
567                 if(level)
568                     b->parent= &s->plane[plane_index].band[level-1][orientation];
569                 //FIXME avoid this realloc
570                 av_freep(&b->x_coeff);
571                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
572                 if (!b->x_coeff)
573                     goto fail;
574             }
575             w= (w+1)>>1;
576             h= (h+1)>>1;
577         }
578     }
579
580     return 0;
581 fail:
582     return AVERROR(ENOMEM);
583 }
584
585 #define USE_HALFPEL_PLANE 0
586
587 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
588     int p,x,y;
589
590     for(p=0; p < s->nb_planes; p++){
591         int is_chroma= !!p;
592         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
593         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
594         int ls= frame->linesize[p];
595         uint8_t *src= frame->data[p];
596
597         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
598         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
599         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
600         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
601             av_freep(&halfpel[1][p]);
602             av_freep(&halfpel[2][p]);
603             av_freep(&halfpel[3][p]);
604             return AVERROR(ENOMEM);
605         }
606         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
607         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
608         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
609
610         halfpel[0][p]= src;
611         for(y=0; y<h; y++){
612             for(x=0; x<w; x++){
613                 int i= y*ls + x;
614
615                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
616             }
617         }
618         for(y=0; y<h; y++){
619             for(x=0; x<w; x++){
620                 int i= y*ls + x;
621
622                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
623             }
624         }
625         src= halfpel[1][p];
626         for(y=0; y<h; y++){
627             for(x=0; x<w; x++){
628                 int i= y*ls + x;
629
630                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
631             }
632         }
633
634 //FIXME border!
635     }
636     return 0;
637 }
638
639 void ff_snow_release_buffer(AVCodecContext *avctx)
640 {
641     SnowContext *s = avctx->priv_data;
642     int i;
643
644     if(s->last_picture[s->max_ref_frames-1]->data[0]){
645         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
646         for(i=0; i<9; i++)
647             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
648                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
649                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
650             }
651     }
652 }
653
654 int ff_snow_frame_start(SnowContext *s){
655    AVFrame *tmp;
656    int i, ret;
657
658     ff_snow_release_buffer(s->avctx);
659
660     tmp= s->last_picture[s->max_ref_frames-1];
661     for(i=s->max_ref_frames-1; i>0; i--)
662         s->last_picture[i] = s->last_picture[i-1];
663     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
664     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
665         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
666             return ret;
667     }
668     s->last_picture[0] = s->current_picture;
669     s->current_picture = tmp;
670
671     if(s->keyframe){
672         s->ref_frames= 0;
673     }else{
674         int i;
675         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
676             if(i && s->last_picture[i-1]->key_frame)
677                 break;
678         s->ref_frames= i;
679         if(s->ref_frames==0){
680             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
681             return -1;
682         }
683     }
684     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
685         return ret;
686
687     s->current_picture->key_frame= s->keyframe;
688
689     return 0;
690 }
691
692 av_cold void ff_snow_common_end(SnowContext *s)
693 {
694     int plane_index, level, orientation, i;
695
696     av_freep(&s->spatial_dwt_buffer);
697     av_freep(&s->temp_dwt_buffer);
698     av_freep(&s->spatial_idwt_buffer);
699     av_freep(&s->temp_idwt_buffer);
700     av_freep(&s->run_buffer);
701
702     s->m.me.temp= NULL;
703     av_freep(&s->m.me.scratchpad);
704     av_freep(&s->m.me.map);
705     av_freep(&s->m.me.score_map);
706     av_freep(&s->m.obmc_scratchpad);
707
708     av_freep(&s->block);
709     av_freep(&s->scratchbuf);
710     av_freep(&s->emu_edge_buffer);
711
712     for(i=0; i<MAX_REF_FRAMES; i++){
713         av_freep(&s->ref_mvs[i]);
714         av_freep(&s->ref_scores[i]);
715         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
716             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
717         }
718         av_frame_free(&s->last_picture[i]);
719     }
720
721     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
722         for(level=s->spatial_decomposition_count-1; level>=0; level--){
723             for(orientation=level ? 1 : 0; orientation<4; orientation++){
724                 SubBand *b= &s->plane[plane_index].band[level][orientation];
725
726                 av_freep(&b->x_coeff);
727             }
728         }
729     }
730     av_frame_free(&s->mconly_picture);
731     av_frame_free(&s->current_picture);
732 }