]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
x86/jpeg2000dsp: add ff_ict_float_{fma3,fma4}
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "me_cmp.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71     int ret, i;
72     int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74     frame->width  = s->avctx->width ;
75     frame->height = s->avctx->height;
76     if (edges_needed) {
77         frame->width  += 2 * EDGE_WIDTH;
78         frame->height += 2 * EDGE_WIDTH;
79     }
80     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81         return ret;
82     if (edges_needed) {
83         for (i = 0; frame->data[i]; i++) {
84             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85                             frame->linesize[i] +
86                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87             frame->data[i] += offset;
88         }
89         frame->width  = s->avctx->width;
90         frame->height = s->avctx->height;
91     }
92
93     return 0;
94 }
95
96 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97     int plane_index, level, orientation;
98
99     for(plane_index=0; plane_index<3; plane_index++){
100         for(level=0; level<MAX_DECOMPOSITIONS; level++){
101             for(orientation=level ? 1:0; orientation<4; orientation++){
102                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103             }
104         }
105     }
106     memset(s->header_state, MID_STATE, sizeof(s->header_state));
107     memset(s->block_state, MID_STATE, sizeof(s->block_state));
108 }
109
110 int ff_snow_alloc_blocks(SnowContext *s){
111     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
112     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114     s->b_width = w;
115     s->b_height= h;
116
117     av_free(s->block);
118     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
119     if (!s->block)
120         return AVERROR(ENOMEM);
121
122     return 0;
123 }
124
125 static av_cold void init_qexp(void){
126     int i;
127     double v=128;
128
129     for(i=0; i<QROOT; i++){
130         ff_qexp[i]= lrintf(v);
131         v *= pow(2, 1.0 / QROOT);
132     }
133 }
134 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135     static const uint8_t weight[64]={
136     8,7,6,5,4,3,2,1,
137     7,7,0,0,0,0,0,1,
138     6,0,6,0,0,0,2,0,
139     5,0,0,5,0,3,0,0,
140     4,0,0,0,4,0,0,0,
141     3,0,0,5,0,3,0,0,
142     2,0,6,0,0,0,2,0,
143     1,7,0,0,0,0,0,1,
144     };
145
146     static const uint8_t brane[256]={
147     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163     };
164
165     static const uint8_t needs[16]={
166     0,1,0,0,
167     2,4,2,0,
168     0,1,0,0,
169     15
170     };
171
172     int x, y, b, r, l;
173     int16_t tmpIt   [64*(32+HTAPS_MAX)];
174     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175     int16_t *tmpI= tmpIt;
176     uint8_t *tmp2= tmp2t[0];
177     const uint8_t *hpel[11];
178     av_assert2(dx<16 && dy<16);
179     r= brane[dx + 16*dy]&15;
180     l= brane[dx + 16*dy]>>4;
181
182     b= needs[l] | needs[r];
183     if(p && !p->diag_mc)
184         b= 15;
185
186     if(b&5){
187         for(y=0; y < b_h+HTAPS_MAX-1; y++){
188             for(x=0; x < b_w; x++){
189                 int a_1=src[x + HTAPS_MAX/2-4];
190                 int a0= src[x + HTAPS_MAX/2-3];
191                 int a1= src[x + HTAPS_MAX/2-2];
192                 int a2= src[x + HTAPS_MAX/2-1];
193                 int a3= src[x + HTAPS_MAX/2+0];
194                 int a4= src[x + HTAPS_MAX/2+1];
195                 int a5= src[x + HTAPS_MAX/2+2];
196                 int a6= src[x + HTAPS_MAX/2+3];
197                 int am=0;
198                 if(!p || p->fast_mc){
199                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200                     tmpI[x]= am;
201                     am= (am+16)>>5;
202                 }else{
203                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204                     tmpI[x]= am;
205                     am= (am+32)>>6;
206                 }
207
208                 if(am&(~255)) am= ~(am>>31);
209                 tmp2[x]= am;
210             }
211             tmpI+= 64;
212             tmp2+= 64;
213             src += stride;
214         }
215         src -= stride*y;
216     }
217     src += HTAPS_MAX/2 - 1;
218     tmp2= tmp2t[1];
219
220     if(b&2){
221         for(y=0; y < b_h; y++){
222             for(x=0; x < b_w+1; x++){
223                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231                 int am=0;
232                 if(!p || p->fast_mc)
233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234                 else
235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237                 if(am&(~255)) am= ~(am>>31);
238                 tmp2[x]= am;
239             }
240             src += stride;
241             tmp2+= 64;
242         }
243         src -= stride*y;
244     }
245     src += stride*(HTAPS_MAX/2 - 1);
246     tmp2= tmp2t[2];
247     tmpI= tmpIt;
248     if(b&4){
249         for(y=0; y < b_h; y++){
250             for(x=0; x < b_w; x++){
251                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259                 int am=0;
260                 if(!p || p->fast_mc)
261                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262                 else
263                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264                 if(am&(~255)) am= ~(am>>31);
265                 tmp2[x]= am;
266             }
267             tmpI+= 64;
268             tmp2+= 64;
269         }
270     }
271
272     hpel[ 0]= src;
273     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274     hpel[ 2]= src + 1;
275
276     hpel[ 4]= tmp2t[1];
277     hpel[ 5]= tmp2t[2];
278     hpel[ 6]= tmp2t[1] + 1;
279
280     hpel[ 8]= src + stride;
281     hpel[ 9]= hpel[1] + 64;
282     hpel[10]= hpel[8] + 1;
283
284 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286     if(b==15){
287         int dxy = dx / 8 + dy / 8 * 4;
288         const uint8_t *src1 = hpel[dxy    ];
289         const uint8_t *src2 = hpel[dxy + 1];
290         const uint8_t *src3 = hpel[dxy + 4];
291         const uint8_t *src4 = hpel[dxy + 5];
292         int stride1 = MC_STRIDE(dxy);
293         int stride2 = MC_STRIDE(dxy + 1);
294         int stride3 = MC_STRIDE(dxy + 4);
295         int stride4 = MC_STRIDE(dxy + 5);
296         dx&=7;
297         dy&=7;
298         for(y=0; y < b_h; y++){
299             for(x=0; x < b_w; x++){
300                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
302             }
303             src1+=stride1;
304             src2+=stride2;
305             src3+=stride3;
306             src4+=stride4;
307             dst +=stride;
308         }
309     }else{
310         const uint8_t *src1= hpel[l];
311         const uint8_t *src2= hpel[r];
312         int stride1 = MC_STRIDE(l);
313         int stride2 = MC_STRIDE(r);
314         int a= weight[((dx&7) + (8*(dy&7)))];
315         int b= 8-a;
316         for(y=0; y < b_h; y++){
317             for(x=0; x < b_w; x++){
318                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319             }
320             src1+=stride1;
321             src2+=stride2;
322             dst +=stride;
323         }
324     }
325 }
326
327 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
328     if(block->type & BLOCK_INTRA){
329         int x, y;
330         const unsigned color  = block->color[plane_index];
331         const unsigned color4 = color*0x01010101;
332         if(b_w==32){
333             for(y=0; y < b_h; y++){
334                 *(uint32_t*)&dst[0 + y*stride]= color4;
335                 *(uint32_t*)&dst[4 + y*stride]= color4;
336                 *(uint32_t*)&dst[8 + y*stride]= color4;
337                 *(uint32_t*)&dst[12+ y*stride]= color4;
338                 *(uint32_t*)&dst[16+ y*stride]= color4;
339                 *(uint32_t*)&dst[20+ y*stride]= color4;
340                 *(uint32_t*)&dst[24+ y*stride]= color4;
341                 *(uint32_t*)&dst[28+ y*stride]= color4;
342             }
343         }else if(b_w==16){
344             for(y=0; y < b_h; y++){
345                 *(uint32_t*)&dst[0 + y*stride]= color4;
346                 *(uint32_t*)&dst[4 + y*stride]= color4;
347                 *(uint32_t*)&dst[8 + y*stride]= color4;
348                 *(uint32_t*)&dst[12+ y*stride]= color4;
349             }
350         }else if(b_w==8){
351             for(y=0; y < b_h; y++){
352                 *(uint32_t*)&dst[0 + y*stride]= color4;
353                 *(uint32_t*)&dst[4 + y*stride]= color4;
354             }
355         }else if(b_w==4){
356             for(y=0; y < b_h; y++){
357                 *(uint32_t*)&dst[0 + y*stride]= color4;
358             }
359         }else{
360             for(y=0; y < b_h; y++){
361                 for(x=0; x < b_w; x++){
362                     dst[x + y*stride]= color;
363                 }
364             }
365         }
366     }else{
367         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369         int mx= block->mx*scale;
370         int my= block->my*scale;
371         const int dx= mx&15;
372         const int dy= my&15;
373         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374         sx += (mx>>4) - (HTAPS_MAX/2-1);
375         sy += (my>>4) - (HTAPS_MAX/2-1);
376         src += sx + sy*stride;
377         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380                                      stride, stride,
381                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382                                      sx, sy, w, h);
383             src= tmp + MB_SIZE;
384         }
385
386         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389         if(    (dx&3) || (dy&3)
390             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391             || (b_w&(b_w-1))
392             || b_w == 1
393             || b_h == 1
394             || !s->plane[plane_index].fast_mc )
395             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396         else if(b_w==32){
397             int y;
398             for(y=0; y<b_h; y+=16){
399                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401             }
402         }else if(b_w==b_h)
403             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404         else if(b_w==2*b_h){
405             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
406             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407         }else{
408             av_assert2(2*b_w==b_h);
409             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
410             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411         }
412     }
413 }
414
415 #define mca(dx,dy,b_w)\
416 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417     av_assert2(h==b_w);\
418     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419 }
420
421 mca( 0, 0,16)
422 mca( 8, 0,16)
423 mca( 0, 8,16)
424 mca( 8, 8,16)
425 mca( 0, 0,8)
426 mca( 8, 0,8)
427 mca( 0, 8,8)
428 mca( 8, 8,8)
429
430 av_cold int ff_snow_common_init(AVCodecContext *avctx){
431     SnowContext *s = avctx->priv_data;
432     int width, height;
433     int i, j;
434
435     s->avctx= avctx;
436     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437     s->spatial_decomposition_count = 1;
438
439     ff_me_cmp_init(&s->mecc, avctx);
440     ff_hpeldsp_init(&s->hdsp, avctx->flags);
441     ff_videodsp_init(&s->vdsp, 8);
442     ff_dwt_init(&s->dwt);
443     ff_h264qpel_init(&s->h264qpel, 8);
444
445 #define mcf(dx,dy)\
446     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
447     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
448         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
449     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
450     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
451         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
452
453     mcf( 0, 0)
454     mcf( 4, 0)
455     mcf( 8, 0)
456     mcf(12, 0)
457     mcf( 0, 4)
458     mcf( 4, 4)
459     mcf( 8, 4)
460     mcf(12, 4)
461     mcf( 0, 8)
462     mcf( 4, 8)
463     mcf( 8, 8)
464     mcf(12, 8)
465     mcf( 0,12)
466     mcf( 4,12)
467     mcf( 8,12)
468     mcf(12,12)
469
470 #define mcfh(dx,dy)\
471     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
472     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
473         mc_block_hpel ## dx ## dy ## 16;\
474     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
475     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
476         mc_block_hpel ## dx ## dy ## 8;
477
478     mcfh(0, 0)
479     mcfh(8, 0)
480     mcfh(0, 8)
481     mcfh(8, 8)
482
483     init_qexp();
484
485 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
486
487     width= s->avctx->width;
488     height= s->avctx->height;
489
490     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
491     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
492     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
493     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
494     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
495
496     for(i=0; i<MAX_REF_FRAMES; i++) {
497         for(j=0; j<MAX_REF_FRAMES; j++)
498             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
499         s->last_picture[i] = av_frame_alloc();
500         if (!s->last_picture[i])
501             goto fail;
502     }
503
504     s->mconly_picture = av_frame_alloc();
505     s->current_picture = av_frame_alloc();
506     if (!s->mconly_picture || !s->current_picture)
507         goto fail;
508
509     return 0;
510 fail:
511     return AVERROR(ENOMEM);
512 }
513
514 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
515     SnowContext *s = avctx->priv_data;
516     int plane_index, level, orientation;
517     int ret, emu_buf_size;
518
519     if(!s->scratchbuf) {
520         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
521                                  AV_GET_BUFFER_FLAG_REF)) < 0)
522             return ret;
523         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
524         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
525         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
526     }
527
528     if(s->mconly_picture->format != avctx->pix_fmt) {
529         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
530         return AVERROR_INVALIDDATA;
531     }
532
533     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
534         int w= s->avctx->width;
535         int h= s->avctx->height;
536
537         if(plane_index){
538             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
539             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
540         }
541         s->plane[plane_index].width = w;
542         s->plane[plane_index].height= h;
543
544         for(level=s->spatial_decomposition_count-1; level>=0; level--){
545             for(orientation=level ? 1 : 0; orientation<4; orientation++){
546                 SubBand *b= &s->plane[plane_index].band[level][orientation];
547
548                 b->buf= s->spatial_dwt_buffer;
549                 b->level= level;
550                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
551                 b->width = (w + !(orientation&1))>>1;
552                 b->height= (h + !(orientation>1))>>1;
553
554                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
555                 b->buf_x_offset = 0;
556                 b->buf_y_offset = 0;
557
558                 if(orientation&1){
559                     b->buf += (w+1)>>1;
560                     b->buf_x_offset = (w+1)>>1;
561                 }
562                 if(orientation>1){
563                     b->buf += b->stride>>1;
564                     b->buf_y_offset = b->stride_line >> 1;
565                 }
566                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
567
568                 if(level)
569                     b->parent= &s->plane[plane_index].band[level-1][orientation];
570                 //FIXME avoid this realloc
571                 av_freep(&b->x_coeff);
572                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
573                 if (!b->x_coeff)
574                     goto fail;
575             }
576             w= (w+1)>>1;
577             h= (h+1)>>1;
578         }
579     }
580
581     return 0;
582 fail:
583     return AVERROR(ENOMEM);
584 }
585
586 #define USE_HALFPEL_PLANE 0
587
588 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
589     int p,x,y;
590
591     for(p=0; p < s->nb_planes; p++){
592         int is_chroma= !!p;
593         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
594         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
595         int ls= frame->linesize[p];
596         uint8_t *src= frame->data[p];
597
598         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
599         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
600         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
601         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
602             av_freep(&halfpel[1][p]);
603             av_freep(&halfpel[2][p]);
604             av_freep(&halfpel[3][p]);
605             return AVERROR(ENOMEM);
606         }
607         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
608         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
609         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
610
611         halfpel[0][p]= src;
612         for(y=0; y<h; y++){
613             for(x=0; x<w; x++){
614                 int i= y*ls + x;
615
616                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
617             }
618         }
619         for(y=0; y<h; y++){
620             for(x=0; x<w; x++){
621                 int i= y*ls + x;
622
623                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
624             }
625         }
626         src= halfpel[1][p];
627         for(y=0; y<h; y++){
628             for(x=0; x<w; x++){
629                 int i= y*ls + x;
630
631                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
632             }
633         }
634
635 //FIXME border!
636     }
637     return 0;
638 }
639
640 void ff_snow_release_buffer(AVCodecContext *avctx)
641 {
642     SnowContext *s = avctx->priv_data;
643     int i;
644
645     if(s->last_picture[s->max_ref_frames-1]->data[0]){
646         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
647         for(i=0; i<9; i++)
648             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
649                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
650                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
651             }
652     }
653 }
654
655 int ff_snow_frame_start(SnowContext *s){
656    AVFrame *tmp;
657    int i, ret;
658
659     ff_snow_release_buffer(s->avctx);
660
661     tmp= s->last_picture[s->max_ref_frames-1];
662     for(i=s->max_ref_frames-1; i>0; i--)
663         s->last_picture[i] = s->last_picture[i-1];
664     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
665     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
666         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
667             return ret;
668     }
669     s->last_picture[0] = s->current_picture;
670     s->current_picture = tmp;
671
672     if(s->keyframe){
673         s->ref_frames= 0;
674     }else{
675         int i;
676         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
677             if(i && s->last_picture[i-1]->key_frame)
678                 break;
679         s->ref_frames= i;
680         if(s->ref_frames==0){
681             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
682             return AVERROR_INVALIDDATA;
683         }
684     }
685     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
686         return ret;
687
688     s->current_picture->key_frame= s->keyframe;
689
690     return 0;
691 }
692
693 av_cold void ff_snow_common_end(SnowContext *s)
694 {
695     int plane_index, level, orientation, i;
696
697     av_freep(&s->spatial_dwt_buffer);
698     av_freep(&s->temp_dwt_buffer);
699     av_freep(&s->spatial_idwt_buffer);
700     av_freep(&s->temp_idwt_buffer);
701     av_freep(&s->run_buffer);
702
703     s->m.me.temp= NULL;
704     av_freep(&s->m.me.scratchpad);
705     av_freep(&s->m.me.map);
706     av_freep(&s->m.me.score_map);
707     av_freep(&s->m.sc.obmc_scratchpad);
708
709     av_freep(&s->block);
710     av_freep(&s->scratchbuf);
711     av_freep(&s->emu_edge_buffer);
712
713     for(i=0; i<MAX_REF_FRAMES; i++){
714         av_freep(&s->ref_mvs[i]);
715         av_freep(&s->ref_scores[i]);
716         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
717             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
718         }
719         av_frame_free(&s->last_picture[i]);
720     }
721
722     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
723         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
724             for(orientation=level ? 1 : 0; orientation<4; orientation++){
725                 SubBand *b= &s->plane[plane_index].band[level][orientation];
726
727                 av_freep(&b->x_coeff);
728             }
729         }
730     }
731     av_frame_free(&s->mconly_picture);
732     av_frame_free(&s->current_picture);
733 }