]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
avformat/avio: Add Metacube support
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "libavutil/thread.h"
25 #include "avcodec.h"
26 #include "me_cmp.h"
27 #include "snow_dwt.h"
28 #include "internal.h"
29 #include "snow.h"
30 #include "snowdata.h"
31
32 #include "rangecoder.h"
33 #include "mathops.h"
34 #include "h263.h"
35
36
37 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
38                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
39     int y, x;
40     IDWTELEM * dst;
41     for(y=0; y<b_h; y++){
42         //FIXME ugly misuse of obmc_stride
43         const uint8_t *obmc1= obmc + y*obmc_stride;
44         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
45         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
46         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
47         dst = slice_buffer_get_line(sb, src_y + y);
48         for(x=0; x<b_w; x++){
49             int v=   obmc1[x] * block[3][x + y*src_stride]
50                     +obmc2[x] * block[2][x + y*src_stride]
51                     +obmc3[x] * block[1][x + y*src_stride]
52                     +obmc4[x] * block[0][x + y*src_stride];
53
54             v <<= 8 - LOG2_OBMC_MAX;
55             if(FRAC_BITS != 8){
56                 v >>= 8 - FRAC_BITS;
57             }
58             if(add){
59                 v += dst[x + src_x];
60                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
61                 if(v&(~255)) v= ~(v>>31);
62                 dst8[x + y*src_stride] = v;
63             }else{
64                 dst[x + src_x] -= v;
65             }
66         }
67     }
68 }
69
70 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
71 {
72     int ret, i;
73     int edges_needed = av_codec_is_encoder(s->avctx->codec);
74
75     frame->width  = s->avctx->width ;
76     frame->height = s->avctx->height;
77     if (edges_needed) {
78         frame->width  += 2 * EDGE_WIDTH;
79         frame->height += 2 * EDGE_WIDTH;
80     }
81     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
82         return ret;
83     if (edges_needed) {
84         for (i = 0; frame->data[i]; i++) {
85             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
86                             frame->linesize[i] +
87                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
88             frame->data[i] += offset;
89         }
90         frame->width  = s->avctx->width;
91         frame->height = s->avctx->height;
92     }
93
94     return 0;
95 }
96
97 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
98     int plane_index, level, orientation;
99
100     for(plane_index=0; plane_index<3; plane_index++){
101         for(level=0; level<MAX_DECOMPOSITIONS; level++){
102             for(orientation=level ? 1:0; orientation<4; orientation++){
103                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
104             }
105         }
106     }
107     memset(s->header_state, MID_STATE, sizeof(s->header_state));
108     memset(s->block_state, MID_STATE, sizeof(s->block_state));
109 }
110
111 int ff_snow_alloc_blocks(SnowContext *s){
112     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
113     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
114
115     s->b_width = w;
116     s->b_height= h;
117
118     av_free(s->block);
119     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
120     if (!s->block)
121         return AVERROR(ENOMEM);
122
123     return 0;
124 }
125
126 static av_cold void init_qexp(void){
127     int i;
128     double v=128;
129
130     for(i=0; i<QROOT; i++){
131         ff_qexp[i]= lrintf(v);
132         v *= pow(2, 1.0 / QROOT);
133     }
134 }
135 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
136     static const uint8_t weight[64]={
137     8,7,6,5,4,3,2,1,
138     7,7,0,0,0,0,0,1,
139     6,0,6,0,0,0,2,0,
140     5,0,0,5,0,3,0,0,
141     4,0,0,0,4,0,0,0,
142     3,0,0,5,0,3,0,0,
143     2,0,6,0,0,0,2,0,
144     1,7,0,0,0,0,0,1,
145     };
146
147     static const uint8_t brane[256]={
148     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
149     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
150     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
151     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
152     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
153     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
154     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
155     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
156     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
157     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
158     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
159     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
160     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
161     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
162     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
163     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
164     };
165
166     static const uint8_t needs[16]={
167     0,1,0,0,
168     2,4,2,0,
169     0,1,0,0,
170     15
171     };
172
173     int x, y, b, r, l;
174     int16_t tmpIt   [64*(32+HTAPS_MAX)];
175     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
176     int16_t *tmpI= tmpIt;
177     uint8_t *tmp2= tmp2t[0];
178     const uint8_t *hpel[11];
179     av_assert2(dx<16 && dy<16);
180     r= brane[dx + 16*dy]&15;
181     l= brane[dx + 16*dy]>>4;
182
183     b= needs[l] | needs[r];
184     if(p && !p->diag_mc)
185         b= 15;
186
187     if(b&5){
188         for(y=0; y < b_h+HTAPS_MAX-1; y++){
189             for(x=0; x < b_w; x++){
190                 int a_1=src[x + HTAPS_MAX/2-4];
191                 int a0= src[x + HTAPS_MAX/2-3];
192                 int a1= src[x + HTAPS_MAX/2-2];
193                 int a2= src[x + HTAPS_MAX/2-1];
194                 int a3= src[x + HTAPS_MAX/2+0];
195                 int a4= src[x + HTAPS_MAX/2+1];
196                 int a5= src[x + HTAPS_MAX/2+2];
197                 int a6= src[x + HTAPS_MAX/2+3];
198                 int am=0;
199                 if(!p || p->fast_mc){
200                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
201                     tmpI[x]= am;
202                     am= (am+16)>>5;
203                 }else{
204                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
205                     tmpI[x]= am;
206                     am= (am+32)>>6;
207                 }
208
209                 if(am&(~255)) am= ~(am>>31);
210                 tmp2[x]= am;
211             }
212             tmpI+= 64;
213             tmp2+= 64;
214             src += stride;
215         }
216         src -= stride*y;
217     }
218     src += HTAPS_MAX/2 - 1;
219     tmp2= tmp2t[1];
220
221     if(b&2){
222         for(y=0; y < b_h; y++){
223             for(x=0; x < b_w+1; x++){
224                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
225                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
226                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
227                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
228                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
229                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
230                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
231                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
232                 int am=0;
233                 if(!p || p->fast_mc)
234                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
235                 else
236                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
237
238                 if(am&(~255)) am= ~(am>>31);
239                 tmp2[x]= am;
240             }
241             src += stride;
242             tmp2+= 64;
243         }
244         src -= stride*y;
245     }
246     src += stride*(HTAPS_MAX/2 - 1);
247     tmp2= tmp2t[2];
248     tmpI= tmpIt;
249     if(b&4){
250         for(y=0; y < b_h; y++){
251             for(x=0; x < b_w; x++){
252                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
253                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
254                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
255                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
256                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
257                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
258                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
259                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
260                 int am=0;
261                 if(!p || p->fast_mc)
262                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
263                 else
264                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
265                 if(am&(~255)) am= ~(am>>31);
266                 tmp2[x]= am;
267             }
268             tmpI+= 64;
269             tmp2+= 64;
270         }
271     }
272
273     hpel[ 0]= src;
274     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
275     hpel[ 2]= src + 1;
276
277     hpel[ 4]= tmp2t[1];
278     hpel[ 5]= tmp2t[2];
279     hpel[ 6]= tmp2t[1] + 1;
280
281     hpel[ 8]= src + stride;
282     hpel[ 9]= hpel[1] + 64;
283     hpel[10]= hpel[8] + 1;
284
285 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
286
287     if(b==15){
288         int dxy = dx / 8 + dy / 8 * 4;
289         const uint8_t *src1 = hpel[dxy    ];
290         const uint8_t *src2 = hpel[dxy + 1];
291         const uint8_t *src3 = hpel[dxy + 4];
292         const uint8_t *src4 = hpel[dxy + 5];
293         int stride1 = MC_STRIDE(dxy);
294         int stride2 = MC_STRIDE(dxy + 1);
295         int stride3 = MC_STRIDE(dxy + 4);
296         int stride4 = MC_STRIDE(dxy + 5);
297         dx&=7;
298         dy&=7;
299         for(y=0; y < b_h; y++){
300             for(x=0; x < b_w; x++){
301                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
302                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
303             }
304             src1+=stride1;
305             src2+=stride2;
306             src3+=stride3;
307             src4+=stride4;
308             dst +=stride;
309         }
310     }else{
311         const uint8_t *src1= hpel[l];
312         const uint8_t *src2= hpel[r];
313         int stride1 = MC_STRIDE(l);
314         int stride2 = MC_STRIDE(r);
315         int a= weight[((dx&7) + (8*(dy&7)))];
316         int b= 8-a;
317         for(y=0; y < b_h; y++){
318             for(x=0; x < b_w; x++){
319                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
320             }
321             src1+=stride1;
322             src2+=stride2;
323             dst +=stride;
324         }
325     }
326 }
327
328 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
329     if(block->type & BLOCK_INTRA){
330         int x, y;
331         const unsigned color  = block->color[plane_index];
332         const unsigned color4 = color*0x01010101;
333         if(b_w==32){
334             for(y=0; y < b_h; y++){
335                 *(uint32_t*)&dst[0 + y*stride]= color4;
336                 *(uint32_t*)&dst[4 + y*stride]= color4;
337                 *(uint32_t*)&dst[8 + y*stride]= color4;
338                 *(uint32_t*)&dst[12+ y*stride]= color4;
339                 *(uint32_t*)&dst[16+ y*stride]= color4;
340                 *(uint32_t*)&dst[20+ y*stride]= color4;
341                 *(uint32_t*)&dst[24+ y*stride]= color4;
342                 *(uint32_t*)&dst[28+ y*stride]= color4;
343             }
344         }else if(b_w==16){
345             for(y=0; y < b_h; y++){
346                 *(uint32_t*)&dst[0 + y*stride]= color4;
347                 *(uint32_t*)&dst[4 + y*stride]= color4;
348                 *(uint32_t*)&dst[8 + y*stride]= color4;
349                 *(uint32_t*)&dst[12+ y*stride]= color4;
350             }
351         }else if(b_w==8){
352             for(y=0; y < b_h; y++){
353                 *(uint32_t*)&dst[0 + y*stride]= color4;
354                 *(uint32_t*)&dst[4 + y*stride]= color4;
355             }
356         }else if(b_w==4){
357             for(y=0; y < b_h; y++){
358                 *(uint32_t*)&dst[0 + y*stride]= color4;
359             }
360         }else{
361             for(y=0; y < b_h; y++){
362                 for(x=0; x < b_w; x++){
363                     dst[x + y*stride]= color;
364                 }
365             }
366         }
367     }else{
368         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
369         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
370         int mx= block->mx*scale;
371         int my= block->my*scale;
372         const int dx= mx&15;
373         const int dy= my&15;
374         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
375         sx += (mx>>4) - (HTAPS_MAX/2-1);
376         sy += (my>>4) - (HTAPS_MAX/2-1);
377         src += sx + sy*stride;
378         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
379            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
380             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
381                                      stride, stride,
382                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
383                                      sx, sy, w, h);
384             src= tmp + MB_SIZE;
385         }
386
387         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
388
389         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
390         if(    (dx&3) || (dy&3)
391             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
392             || (b_w&(b_w-1))
393             || b_w == 1
394             || b_h == 1
395             || !s->plane[plane_index].fast_mc )
396             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
397         else if(b_w==32){
398             int y;
399             for(y=0; y<b_h; y+=16){
400                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
401                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
402             }
403         }else if(b_w==b_h)
404             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
405         else if(b_w==2*b_h){
406             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
407             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
408         }else{
409             av_assert2(2*b_w==b_h);
410             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
411             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
412         }
413     }
414 }
415
416 #define mca(dx,dy,b_w)\
417 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
418     av_assert2(h==b_w);\
419     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
420 }
421
422 mca( 0, 0,16)
423 mca( 8, 0,16)
424 mca( 0, 8,16)
425 mca( 8, 8,16)
426 mca( 0, 0,8)
427 mca( 8, 0,8)
428 mca( 0, 8,8)
429 mca( 8, 8,8)
430
431 static av_cold void snow_static_init(void)
432 {
433     for (int i = 0; i < MAX_REF_FRAMES; i++)
434         for (int j = 0; j < MAX_REF_FRAMES; j++)
435             ff_scale_mv_ref[i][j] = 256 * (i + 1) / (j + 1);
436     init_qexp();
437 }
438
439 av_cold int ff_snow_common_init(AVCodecContext *avctx){
440     static AVOnce init_static_once = AV_ONCE_INIT;
441     SnowContext *s = avctx->priv_data;
442     int width, height;
443     int i;
444
445     s->avctx= avctx;
446     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
447     s->spatial_decomposition_count = 1;
448
449     ff_me_cmp_init(&s->mecc, avctx);
450     ff_hpeldsp_init(&s->hdsp, avctx->flags);
451     ff_videodsp_init(&s->vdsp, 8);
452     ff_dwt_init(&s->dwt);
453     ff_h264qpel_init(&s->h264qpel, 8);
454
455 #define mcf(dx,dy)\
456     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
457     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
458         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
459     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
460     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
461         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
462
463     mcf( 0, 0)
464     mcf( 4, 0)
465     mcf( 8, 0)
466     mcf(12, 0)
467     mcf( 0, 4)
468     mcf( 4, 4)
469     mcf( 8, 4)
470     mcf(12, 4)
471     mcf( 0, 8)
472     mcf( 4, 8)
473     mcf( 8, 8)
474     mcf(12, 8)
475     mcf( 0,12)
476     mcf( 4,12)
477     mcf( 8,12)
478     mcf(12,12)
479
480 #define mcfh(dx,dy)\
481     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
482     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
483         mc_block_hpel ## dx ## dy ## 16;\
484     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
485     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
486         mc_block_hpel ## dx ## dy ## 8;
487
488     mcfh(0, 0)
489     mcfh(8, 0)
490     mcfh(0, 8)
491     mcfh(8, 8)
492
493 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
494
495     width= s->avctx->width;
496     height= s->avctx->height;
497
498     if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
499         !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer,  width * height) ||  //FIXME this does not belong here
500         !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer,     width)          ||
501         !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer,    width)          ||
502         !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
503         return AVERROR(ENOMEM);
504
505     for(i=0; i<MAX_REF_FRAMES; i++) {
506         s->last_picture[i] = av_frame_alloc();
507         if (!s->last_picture[i])
508             return AVERROR(ENOMEM);
509     }
510
511     s->mconly_picture = av_frame_alloc();
512     s->current_picture = av_frame_alloc();
513     if (!s->mconly_picture || !s->current_picture)
514         return AVERROR(ENOMEM);
515
516     ff_thread_once(&init_static_once, snow_static_init);
517
518     return 0;
519 }
520
521 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
522     SnowContext *s = avctx->priv_data;
523     int plane_index, level, orientation;
524     int ret, emu_buf_size;
525
526     if(!s->scratchbuf) {
527         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
528                                  AV_GET_BUFFER_FLAG_REF)) < 0)
529             return ret;
530         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
531         if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf,      FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
532             !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
533             return AVERROR(ENOMEM);
534     }
535
536     if(s->mconly_picture->format != avctx->pix_fmt) {
537         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
538         return AVERROR_INVALIDDATA;
539     }
540
541     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
542         int w= s->avctx->width;
543         int h= s->avctx->height;
544
545         if(plane_index){
546             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
547             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
548         }
549         s->plane[plane_index].width = w;
550         s->plane[plane_index].height= h;
551
552         for(level=s->spatial_decomposition_count-1; level>=0; level--){
553             for(orientation=level ? 1 : 0; orientation<4; orientation++){
554                 SubBand *b= &s->plane[plane_index].band[level][orientation];
555
556                 b->buf= s->spatial_dwt_buffer;
557                 b->level= level;
558                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
559                 b->width = (w + !(orientation&1))>>1;
560                 b->height= (h + !(orientation>1))>>1;
561
562                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
563                 b->buf_x_offset = 0;
564                 b->buf_y_offset = 0;
565
566                 if(orientation&1){
567                     b->buf += (w+1)>>1;
568                     b->buf_x_offset = (w+1)>>1;
569                 }
570                 if(orientation>1){
571                     b->buf += b->stride>>1;
572                     b->buf_y_offset = b->stride_line >> 1;
573                 }
574                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
575
576                 if(level)
577                     b->parent= &s->plane[plane_index].band[level-1][orientation];
578                 //FIXME avoid this realloc
579                 av_freep(&b->x_coeff);
580                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
581                 if (!b->x_coeff)
582                     return AVERROR(ENOMEM);
583             }
584             w= (w+1)>>1;
585             h= (h+1)>>1;
586         }
587     }
588
589     return 0;
590 }
591
592 #define USE_HALFPEL_PLANE 0
593
594 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
595     int p,x,y;
596
597     for(p=0; p < s->nb_planes; p++){
598         int is_chroma= !!p;
599         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
600         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
601         int ls= frame->linesize[p];
602         uint8_t *src= frame->data[p];
603
604         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
605         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
606         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
607         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
608             av_freep(&halfpel[1][p]);
609             av_freep(&halfpel[2][p]);
610             av_freep(&halfpel[3][p]);
611             return AVERROR(ENOMEM);
612         }
613         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
614         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
615         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
616
617         halfpel[0][p]= src;
618         for(y=0; y<h; y++){
619             for(x=0; x<w; x++){
620                 int i= y*ls + x;
621
622                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
623             }
624         }
625         for(y=0; y<h; y++){
626             for(x=0; x<w; x++){
627                 int i= y*ls + x;
628
629                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
630             }
631         }
632         src= halfpel[1][p];
633         for(y=0; y<h; y++){
634             for(x=0; x<w; x++){
635                 int i= y*ls + x;
636
637                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
638             }
639         }
640
641 //FIXME border!
642     }
643     return 0;
644 }
645
646 void ff_snow_release_buffer(AVCodecContext *avctx)
647 {
648     SnowContext *s = avctx->priv_data;
649     int i;
650
651     if(s->last_picture[s->max_ref_frames-1]->data[0]){
652         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
653         for(i=0; i<9; i++)
654             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
655                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
656                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
657             }
658     }
659 }
660
661 int ff_snow_frame_start(SnowContext *s){
662    AVFrame *tmp;
663    int i, ret;
664
665     ff_snow_release_buffer(s->avctx);
666
667     tmp= s->last_picture[s->max_ref_frames-1];
668     for(i=s->max_ref_frames-1; i>0; i--)
669         s->last_picture[i] = s->last_picture[i-1];
670     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
671     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
672         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
673             return ret;
674     }
675     s->last_picture[0] = s->current_picture;
676     s->current_picture = tmp;
677
678     if(s->keyframe){
679         s->ref_frames= 0;
680     }else{
681         int i;
682         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
683             if(i && s->last_picture[i-1]->key_frame)
684                 break;
685         s->ref_frames= i;
686         if(s->ref_frames==0){
687             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
688             return AVERROR_INVALIDDATA;
689         }
690     }
691     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
692         return ret;
693
694     s->current_picture->key_frame= s->keyframe;
695
696     return 0;
697 }
698
699 av_cold void ff_snow_common_end(SnowContext *s)
700 {
701     int plane_index, level, orientation, i;
702
703     av_freep(&s->spatial_dwt_buffer);
704     av_freep(&s->temp_dwt_buffer);
705     av_freep(&s->spatial_idwt_buffer);
706     av_freep(&s->temp_idwt_buffer);
707     av_freep(&s->run_buffer);
708
709     s->m.me.temp= NULL;
710     av_freep(&s->m.me.scratchpad);
711     av_freep(&s->m.me.map);
712     av_freep(&s->m.me.score_map);
713     av_freep(&s->m.sc.obmc_scratchpad);
714
715     av_freep(&s->block);
716     av_freep(&s->scratchbuf);
717     av_freep(&s->emu_edge_buffer);
718
719     for(i=0; i<MAX_REF_FRAMES; i++){
720         av_freep(&s->ref_mvs[i]);
721         av_freep(&s->ref_scores[i]);
722         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
723             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
724         }
725         av_frame_free(&s->last_picture[i]);
726     }
727
728     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
729         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
730             for(orientation=level ? 1 : 0; orientation<4; orientation++){
731                 SubBand *b= &s->plane[plane_index].band[level][orientation];
732
733                 av_freep(&b->x_coeff);
734             }
735         }
736     }
737     av_frame_free(&s->mconly_picture);
738     av_frame_free(&s->current_picture);
739 }