]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
Merge commit '458e7c94830d1522997e33a0b5e87bd709e8a349'
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71     int ret, i;
72
73     frame->width  = s->avctx->width  + 2 * EDGE_WIDTH;
74     frame->height = s->avctx->height + 2 * EDGE_WIDTH;
75     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
76         return ret;
77     for (i = 0; frame->data[i]; i++) {
78         int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
79                         frame->linesize[i] +
80                         (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
81         frame->data[i] += offset;
82     }
83     frame->width  = s->avctx->width;
84     frame->height = s->avctx->height;
85
86     return 0;
87 }
88
89 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
90     int plane_index, level, orientation;
91
92     for(plane_index=0; plane_index<3; plane_index++){
93         for(level=0; level<MAX_DECOMPOSITIONS; level++){
94             for(orientation=level ? 1:0; orientation<4; orientation++){
95                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
96             }
97         }
98     }
99     memset(s->header_state, MID_STATE, sizeof(s->header_state));
100     memset(s->block_state, MID_STATE, sizeof(s->block_state));
101 }
102
103 int ff_snow_alloc_blocks(SnowContext *s){
104     int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
105     int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
106
107     s->b_width = w;
108     s->b_height= h;
109
110     av_free(s->block);
111     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
112     if (!s->block)
113         return AVERROR(ENOMEM);
114
115     return 0;
116 }
117
118 static av_cold void init_qexp(void){
119     int i;
120     double v=128;
121
122     for(i=0; i<QROOT; i++){
123         ff_qexp[i]= lrintf(v);
124         v *= pow(2, 1.0 / QROOT);
125     }
126 }
127 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
128     static const uint8_t weight[64]={
129     8,7,6,5,4,3,2,1,
130     7,7,0,0,0,0,0,1,
131     6,0,6,0,0,0,2,0,
132     5,0,0,5,0,3,0,0,
133     4,0,0,0,4,0,0,0,
134     3,0,0,5,0,3,0,0,
135     2,0,6,0,0,0,2,0,
136     1,7,0,0,0,0,0,1,
137     };
138
139     static const uint8_t brane[256]={
140     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
141     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
142     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
143     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
144     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
145     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
146     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
147     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
148     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
149     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
150     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
151     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
152     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
153     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
154     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
155     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
156     };
157
158     static const uint8_t needs[16]={
159     0,1,0,0,
160     2,4,2,0,
161     0,1,0,0,
162     15
163     };
164
165     int x, y, b, r, l;
166     int16_t tmpIt   [64*(32+HTAPS_MAX)];
167     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
168     int16_t *tmpI= tmpIt;
169     uint8_t *tmp2= tmp2t[0];
170     const uint8_t *hpel[11];
171     av_assert2(dx<16 && dy<16);
172     r= brane[dx + 16*dy]&15;
173     l= brane[dx + 16*dy]>>4;
174
175     b= needs[l] | needs[r];
176     if(p && !p->diag_mc)
177         b= 15;
178
179     if(b&5){
180         for(y=0; y < b_h+HTAPS_MAX-1; y++){
181             for(x=0; x < b_w; x++){
182                 int a_1=src[x + HTAPS_MAX/2-4];
183                 int a0= src[x + HTAPS_MAX/2-3];
184                 int a1= src[x + HTAPS_MAX/2-2];
185                 int a2= src[x + HTAPS_MAX/2-1];
186                 int a3= src[x + HTAPS_MAX/2+0];
187                 int a4= src[x + HTAPS_MAX/2+1];
188                 int a5= src[x + HTAPS_MAX/2+2];
189                 int a6= src[x + HTAPS_MAX/2+3];
190                 int am=0;
191                 if(!p || p->fast_mc){
192                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
193                     tmpI[x]= am;
194                     am= (am+16)>>5;
195                 }else{
196                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
197                     tmpI[x]= am;
198                     am= (am+32)>>6;
199                 }
200
201                 if(am&(~255)) am= ~(am>>31);
202                 tmp2[x]= am;
203             }
204             tmpI+= 64;
205             tmp2+= 64;
206             src += stride;
207         }
208         src -= stride*y;
209     }
210     src += HTAPS_MAX/2 - 1;
211     tmp2= tmp2t[1];
212
213     if(b&2){
214         for(y=0; y < b_h; y++){
215             for(x=0; x < b_w+1; x++){
216                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
217                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
218                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
219                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
220                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
221                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
222                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
223                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
224                 int am=0;
225                 if(!p || p->fast_mc)
226                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
227                 else
228                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
229
230                 if(am&(~255)) am= ~(am>>31);
231                 tmp2[x]= am;
232             }
233             src += stride;
234             tmp2+= 64;
235         }
236         src -= stride*y;
237     }
238     src += stride*(HTAPS_MAX/2 - 1);
239     tmp2= tmp2t[2];
240     tmpI= tmpIt;
241     if(b&4){
242         for(y=0; y < b_h; y++){
243             for(x=0; x < b_w; x++){
244                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
245                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
246                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
247                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
248                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
249                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
250                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
251                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
252                 int am=0;
253                 if(!p || p->fast_mc)
254                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
255                 else
256                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
257                 if(am&(~255)) am= ~(am>>31);
258                 tmp2[x]= am;
259             }
260             tmpI+= 64;
261             tmp2+= 64;
262         }
263     }
264
265     hpel[ 0]= src;
266     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
267     hpel[ 2]= src + 1;
268
269     hpel[ 4]= tmp2t[1];
270     hpel[ 5]= tmp2t[2];
271     hpel[ 6]= tmp2t[1] + 1;
272
273     hpel[ 8]= src + stride;
274     hpel[ 9]= hpel[1] + 64;
275     hpel[10]= hpel[8] + 1;
276
277 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
278
279     if(b==15){
280         int dxy = dx / 8 + dy / 8 * 4;
281         const uint8_t *src1 = hpel[dxy    ];
282         const uint8_t *src2 = hpel[dxy + 1];
283         const uint8_t *src3 = hpel[dxy + 4];
284         const uint8_t *src4 = hpel[dxy + 5];
285         int stride1 = MC_STRIDE(dxy);
286         int stride2 = MC_STRIDE(dxy + 1);
287         int stride3 = MC_STRIDE(dxy + 4);
288         int stride4 = MC_STRIDE(dxy + 5);
289         dx&=7;
290         dy&=7;
291         for(y=0; y < b_h; y++){
292             for(x=0; x < b_w; x++){
293                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
294                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
295             }
296             src1+=stride1;
297             src2+=stride2;
298             src3+=stride3;
299             src4+=stride4;
300             dst +=stride;
301         }
302     }else{
303         const uint8_t *src1= hpel[l];
304         const uint8_t *src2= hpel[r];
305         int stride1 = MC_STRIDE(l);
306         int stride2 = MC_STRIDE(r);
307         int a= weight[((dx&7) + (8*(dy&7)))];
308         int b= 8-a;
309         for(y=0; y < b_h; y++){
310             for(x=0; x < b_w; x++){
311                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
312             }
313             src1+=stride1;
314             src2+=stride2;
315             dst +=stride;
316         }
317     }
318 }
319
320 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
321     if(block->type & BLOCK_INTRA){
322         int x, y;
323         const unsigned color  = block->color[plane_index];
324         const unsigned color4 = color*0x01010101;
325         if(b_w==32){
326             for(y=0; y < b_h; y++){
327                 *(uint32_t*)&dst[0 + y*stride]= color4;
328                 *(uint32_t*)&dst[4 + y*stride]= color4;
329                 *(uint32_t*)&dst[8 + y*stride]= color4;
330                 *(uint32_t*)&dst[12+ y*stride]= color4;
331                 *(uint32_t*)&dst[16+ y*stride]= color4;
332                 *(uint32_t*)&dst[20+ y*stride]= color4;
333                 *(uint32_t*)&dst[24+ y*stride]= color4;
334                 *(uint32_t*)&dst[28+ y*stride]= color4;
335             }
336         }else if(b_w==16){
337             for(y=0; y < b_h; y++){
338                 *(uint32_t*)&dst[0 + y*stride]= color4;
339                 *(uint32_t*)&dst[4 + y*stride]= color4;
340                 *(uint32_t*)&dst[8 + y*stride]= color4;
341                 *(uint32_t*)&dst[12+ y*stride]= color4;
342             }
343         }else if(b_w==8){
344             for(y=0; y < b_h; y++){
345                 *(uint32_t*)&dst[0 + y*stride]= color4;
346                 *(uint32_t*)&dst[4 + y*stride]= color4;
347             }
348         }else if(b_w==4){
349             for(y=0; y < b_h; y++){
350                 *(uint32_t*)&dst[0 + y*stride]= color4;
351             }
352         }else{
353             for(y=0; y < b_h; y++){
354                 for(x=0; x < b_w; x++){
355                     dst[x + y*stride]= color;
356                 }
357             }
358         }
359     }else{
360         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
361         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
362         int mx= block->mx*scale;
363         int my= block->my*scale;
364         const int dx= mx&15;
365         const int dy= my&15;
366         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
367         sx += (mx>>4) - (HTAPS_MAX/2-1);
368         sy += (my>>4) - (HTAPS_MAX/2-1);
369         src += sx + sy*stride;
370         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
371            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
372             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
373                                      stride, stride,
374                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
375                                      sx, sy, w, h);
376             src= tmp + MB_SIZE;
377         }
378
379         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
380
381         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
382         if(    (dx&3) || (dy&3)
383             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
384             || (b_w&(b_w-1))
385             || b_w == 1
386             || b_h == 1
387             || !s->plane[plane_index].fast_mc )
388             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
389         else if(b_w==32){
390             int y;
391             for(y=0; y<b_h; y+=16){
392                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
393                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
394             }
395         }else if(b_w==b_h)
396             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
397         else if(b_w==2*b_h){
398             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
399             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
400         }else{
401             av_assert2(2*b_w==b_h);
402             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
403             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
404         }
405     }
406 }
407
408 #define mca(dx,dy,b_w)\
409 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
410     av_assert2(h==b_w);\
411     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
412 }
413
414 mca( 0, 0,16)
415 mca( 8, 0,16)
416 mca( 0, 8,16)
417 mca( 8, 8,16)
418 mca( 0, 0,8)
419 mca( 8, 0,8)
420 mca( 0, 8,8)
421 mca( 8, 8,8)
422
423 av_cold int ff_snow_common_init(AVCodecContext *avctx){
424     SnowContext *s = avctx->priv_data;
425     int width, height;
426     int i, j;
427
428     s->avctx= avctx;
429     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
430
431     ff_dsputil_init(&s->dsp, avctx);
432     ff_hpeldsp_init(&s->hdsp, avctx->flags);
433     ff_videodsp_init(&s->vdsp, 8);
434     ff_dwt_init(&s->dwt);
435     ff_h264qpel_init(&s->h264qpel, 8);
436     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
437
438 #define mcf(dx,dy)\
439     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
440     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
441         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
442     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
443     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
444         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
445
446     mcf( 0, 0)
447     mcf( 4, 0)
448     mcf( 8, 0)
449     mcf(12, 0)
450     mcf( 0, 4)
451     mcf( 4, 4)
452     mcf( 8, 4)
453     mcf(12, 4)
454     mcf( 0, 8)
455     mcf( 4, 8)
456     mcf( 8, 8)
457     mcf(12, 8)
458     mcf( 0,12)
459     mcf( 4,12)
460     mcf( 8,12)
461     mcf(12,12)
462
463 #define mcfh(dx,dy)\
464     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
465     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
466         mc_block_hpel ## dx ## dy ## 16;\
467     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
468     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
469         mc_block_hpel ## dx ## dy ## 8;
470
471     mcfh(0, 0)
472     mcfh(8, 0)
473     mcfh(0, 8)
474     mcfh(8, 8)
475
476     init_qexp();
477
478 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
479
480     width= s->avctx->width;
481     height= s->avctx->height;
482
483     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
484     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
485     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
486     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
487     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
488
489     for(i=0; i<MAX_REF_FRAMES; i++) {
490         for(j=0; j<MAX_REF_FRAMES; j++)
491             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
492         s->last_picture[i] = av_frame_alloc();
493         if (!s->last_picture[i])
494             goto fail;
495     }
496
497     s->mconly_picture = av_frame_alloc();
498     s->current_picture = av_frame_alloc();
499     if (!s->mconly_picture || !s->current_picture)
500         goto fail;
501
502     return 0;
503 fail:
504     return AVERROR(ENOMEM);
505 }
506
507 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
508     SnowContext *s = avctx->priv_data;
509     int plane_index, level, orientation;
510     int ret, emu_buf_size;
511
512     if(!s->scratchbuf) {
513         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
514                                  AV_GET_BUFFER_FLAG_REF)) < 0)
515             return ret;
516         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
517         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
518         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
519     }
520
521     if(s->mconly_picture->format != avctx->pix_fmt) {
522         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
523         return AVERROR_INVALIDDATA;
524     }
525
526     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
527         int w= s->avctx->width;
528         int h= s->avctx->height;
529
530         if(plane_index){
531             w>>= s->chroma_h_shift;
532             h>>= s->chroma_v_shift;
533         }
534         s->plane[plane_index].width = w;
535         s->plane[plane_index].height= h;
536
537         for(level=s->spatial_decomposition_count-1; level>=0; level--){
538             for(orientation=level ? 1 : 0; orientation<4; orientation++){
539                 SubBand *b= &s->plane[plane_index].band[level][orientation];
540
541                 b->buf= s->spatial_dwt_buffer;
542                 b->level= level;
543                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
544                 b->width = (w + !(orientation&1))>>1;
545                 b->height= (h + !(orientation>1))>>1;
546
547                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
548                 b->buf_x_offset = 0;
549                 b->buf_y_offset = 0;
550
551                 if(orientation&1){
552                     b->buf += (w+1)>>1;
553                     b->buf_x_offset = (w+1)>>1;
554                 }
555                 if(orientation>1){
556                     b->buf += b->stride>>1;
557                     b->buf_y_offset = b->stride_line >> 1;
558                 }
559                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
560
561                 if(level)
562                     b->parent= &s->plane[plane_index].band[level-1][orientation];
563                 //FIXME avoid this realloc
564                 av_freep(&b->x_coeff);
565                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
566                 if (!b->x_coeff)
567                     goto fail;
568             }
569             w= (w+1)>>1;
570             h= (h+1)>>1;
571         }
572     }
573
574     return 0;
575 fail:
576     return AVERROR(ENOMEM);
577 }
578
579 #define USE_HALFPEL_PLANE 0
580
581 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
582     int p,x,y;
583
584     for(p=0; p < s->nb_planes; p++){
585         int is_chroma= !!p;
586         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
587         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
588         int ls= frame->linesize[p];
589         uint8_t *src= frame->data[p];
590
591         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
592         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
593         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
594         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p])
595             return AVERROR(ENOMEM);
596
597         halfpel[0][p]= src;
598         for(y=0; y<h; y++){
599             for(x=0; x<w; x++){
600                 int i= y*ls + x;
601
602                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
603             }
604         }
605         for(y=0; y<h; y++){
606             for(x=0; x<w; x++){
607                 int i= y*ls + x;
608
609                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
610             }
611         }
612         src= halfpel[1][p];
613         for(y=0; y<h; y++){
614             for(x=0; x<w; x++){
615                 int i= y*ls + x;
616
617                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
618             }
619         }
620
621 //FIXME border!
622     }
623     return 0;
624 }
625
626 void ff_snow_release_buffer(AVCodecContext *avctx)
627 {
628     SnowContext *s = avctx->priv_data;
629     int i;
630
631     if(s->last_picture[s->max_ref_frames-1]->data[0]){
632         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
633         for(i=0; i<9; i++)
634             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
635                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
636     }
637 }
638
639 int ff_snow_frame_start(SnowContext *s){
640    AVFrame *tmp;
641    int i, ret;
642    int w= s->avctx->width; //FIXME round up to x16 ?
643    int h= s->avctx->height;
644
645     if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
646         s->mpvencdsp.draw_edges(s->current_picture->data[0],
647                                 s->current_picture->linesize[0], w   , h   ,
648                                 EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
649         if (s->current_picture->data[2]) {
650             s->mpvencdsp.draw_edges(s->current_picture->data[1],
651                                     s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
652                                     EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
653             s->mpvencdsp.draw_edges(s->current_picture->data[2],
654                                     s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
655                                     EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
656         }
657     }
658
659     ff_snow_release_buffer(s->avctx);
660
661     tmp= s->last_picture[s->max_ref_frames-1];
662     for(i=s->max_ref_frames-1; i>0; i--)
663         s->last_picture[i] = s->last_picture[i-1];
664     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
665     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
666         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
667             return ret;
668     }
669     s->last_picture[0] = s->current_picture;
670     s->current_picture = tmp;
671
672     if(s->keyframe){
673         s->ref_frames= 0;
674     }else{
675         int i;
676         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
677             if(i && s->last_picture[i-1]->key_frame)
678                 break;
679         s->ref_frames= i;
680         if(s->ref_frames==0){
681             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
682             return -1;
683         }
684     }
685     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
686         return ret;
687
688     s->current_picture->key_frame= s->keyframe;
689
690     return 0;
691 }
692
693 av_cold void ff_snow_common_end(SnowContext *s)
694 {
695     int plane_index, level, orientation, i;
696
697     av_freep(&s->spatial_dwt_buffer);
698     av_freep(&s->temp_dwt_buffer);
699     av_freep(&s->spatial_idwt_buffer);
700     av_freep(&s->temp_idwt_buffer);
701     av_freep(&s->run_buffer);
702
703     s->m.me.temp= NULL;
704     av_freep(&s->m.me.scratchpad);
705     av_freep(&s->m.me.map);
706     av_freep(&s->m.me.score_map);
707     av_freep(&s->m.obmc_scratchpad);
708
709     av_freep(&s->block);
710     av_freep(&s->scratchbuf);
711     av_freep(&s->emu_edge_buffer);
712
713     for(i=0; i<MAX_REF_FRAMES; i++){
714         av_freep(&s->ref_mvs[i]);
715         av_freep(&s->ref_scores[i]);
716         if(s->last_picture[i]->data[0]) {
717             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
718         }
719         av_frame_free(&s->last_picture[i]);
720     }
721
722     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
723         for(level=s->spatial_decomposition_count-1; level>=0; level--){
724             for(orientation=level ? 1 : 0; orientation<4; orientation++){
725                 SubBand *b= &s->plane[plane_index].band[level][orientation];
726
727                 av_freep(&b->x_coeff);
728             }
729         }
730     }
731     av_frame_free(&s->mconly_picture);
732     av_frame_free(&s->current_picture);
733 }