]> git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c
avcodec/dvdsubdec: Fix off-by-one error
[ffmpeg] / libavcodec / snow.c
1 /*
2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "dsputil.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38     int y, x;
39     IDWTELEM * dst;
40     for(y=0; y<b_h; y++){
41         //FIXME ugly misuse of obmc_stride
42         const uint8_t *obmc1= obmc + y*obmc_stride;
43         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46         dst = slice_buffer_get_line(sb, src_y + y);
47         for(x=0; x<b_w; x++){
48             int v=   obmc1[x] * block[3][x + y*src_stride]
49                     +obmc2[x] * block[2][x + y*src_stride]
50                     +obmc3[x] * block[1][x + y*src_stride]
51                     +obmc4[x] * block[0][x + y*src_stride];
52
53             v <<= 8 - LOG2_OBMC_MAX;
54             if(FRAC_BITS != 8){
55                 v >>= 8 - FRAC_BITS;
56             }
57             if(add){
58                 v += dst[x + src_x];
59                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60                 if(v&(~255)) v= ~(v>>31);
61                 dst8[x + y*src_stride] = v;
62             }else{
63                 dst[x + src_x] -= v;
64             }
65         }
66     }
67 }
68
69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71     int ret, i;
72
73     frame->width  = s->avctx->width  + 2 * EDGE_WIDTH;
74     frame->height = s->avctx->height + 2 * EDGE_WIDTH;
75     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
76         return ret;
77     for (i = 0; frame->data[i]; i++) {
78         int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
79                         frame->linesize[i] +
80                         (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
81         frame->data[i] += offset;
82     }
83     frame->width  = s->avctx->width;
84     frame->height = s->avctx->height;
85
86     return 0;
87 }
88
89 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
90     int plane_index, level, orientation;
91
92     for(plane_index=0; plane_index<3; plane_index++){
93         for(level=0; level<MAX_DECOMPOSITIONS; level++){
94             for(orientation=level ? 1:0; orientation<4; orientation++){
95                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
96             }
97         }
98     }
99     memset(s->header_state, MID_STATE, sizeof(s->header_state));
100     memset(s->block_state, MID_STATE, sizeof(s->block_state));
101 }
102
103 int ff_snow_alloc_blocks(SnowContext *s){
104     int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
105     int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
106
107     s->b_width = w;
108     s->b_height= h;
109
110     av_free(s->block);
111     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
112     if (!s->block)
113         return AVERROR(ENOMEM);
114
115     return 0;
116 }
117
118 static av_cold void init_qexp(void){
119     int i;
120     double v=128;
121
122     for(i=0; i<QROOT; i++){
123         ff_qexp[i]= lrintf(v);
124         v *= pow(2, 1.0 / QROOT);
125     }
126 }
127 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
128     static const uint8_t weight[64]={
129     8,7,6,5,4,3,2,1,
130     7,7,0,0,0,0,0,1,
131     6,0,6,0,0,0,2,0,
132     5,0,0,5,0,3,0,0,
133     4,0,0,0,4,0,0,0,
134     3,0,0,5,0,3,0,0,
135     2,0,6,0,0,0,2,0,
136     1,7,0,0,0,0,0,1,
137     };
138
139     static const uint8_t brane[256]={
140     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
141     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
142     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
143     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
144     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
145     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
146     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
147     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
148     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
149     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
150     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
151     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
152     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
153     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
154     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
155     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
156     };
157
158     static const uint8_t needs[16]={
159     0,1,0,0,
160     2,4,2,0,
161     0,1,0,0,
162     15
163     };
164
165     int x, y, b, r, l;
166     int16_t tmpIt   [64*(32+HTAPS_MAX)];
167     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
168     int16_t *tmpI= tmpIt;
169     uint8_t *tmp2= tmp2t[0];
170     const uint8_t *hpel[11];
171     av_assert2(dx<16 && dy<16);
172     r= brane[dx + 16*dy]&15;
173     l= brane[dx + 16*dy]>>4;
174
175     b= needs[l] | needs[r];
176     if(p && !p->diag_mc)
177         b= 15;
178
179     if(b&5){
180         for(y=0; y < b_h+HTAPS_MAX-1; y++){
181             for(x=0; x < b_w; x++){
182                 int a_1=src[x + HTAPS_MAX/2-4];
183                 int a0= src[x + HTAPS_MAX/2-3];
184                 int a1= src[x + HTAPS_MAX/2-2];
185                 int a2= src[x + HTAPS_MAX/2-1];
186                 int a3= src[x + HTAPS_MAX/2+0];
187                 int a4= src[x + HTAPS_MAX/2+1];
188                 int a5= src[x + HTAPS_MAX/2+2];
189                 int a6= src[x + HTAPS_MAX/2+3];
190                 int am=0;
191                 if(!p || p->fast_mc){
192                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
193                     tmpI[x]= am;
194                     am= (am+16)>>5;
195                 }else{
196                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
197                     tmpI[x]= am;
198                     am= (am+32)>>6;
199                 }
200
201                 if(am&(~255)) am= ~(am>>31);
202                 tmp2[x]= am;
203             }
204             tmpI+= 64;
205             tmp2+= 64;
206             src += stride;
207         }
208         src -= stride*y;
209     }
210     src += HTAPS_MAX/2 - 1;
211     tmp2= tmp2t[1];
212
213     if(b&2){
214         for(y=0; y < b_h; y++){
215             for(x=0; x < b_w+1; x++){
216                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
217                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
218                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
219                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
220                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
221                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
222                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
223                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
224                 int am=0;
225                 if(!p || p->fast_mc)
226                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
227                 else
228                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
229
230                 if(am&(~255)) am= ~(am>>31);
231                 tmp2[x]= am;
232             }
233             src += stride;
234             tmp2+= 64;
235         }
236         src -= stride*y;
237     }
238     src += stride*(HTAPS_MAX/2 - 1);
239     tmp2= tmp2t[2];
240     tmpI= tmpIt;
241     if(b&4){
242         for(y=0; y < b_h; y++){
243             for(x=0; x < b_w; x++){
244                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
245                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
246                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
247                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
248                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
249                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
250                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
251                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
252                 int am=0;
253                 if(!p || p->fast_mc)
254                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
255                 else
256                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
257                 if(am&(~255)) am= ~(am>>31);
258                 tmp2[x]= am;
259             }
260             tmpI+= 64;
261             tmp2+= 64;
262         }
263     }
264
265     hpel[ 0]= src;
266     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
267     hpel[ 2]= src + 1;
268
269     hpel[ 4]= tmp2t[1];
270     hpel[ 5]= tmp2t[2];
271     hpel[ 6]= tmp2t[1] + 1;
272
273     hpel[ 8]= src + stride;
274     hpel[ 9]= hpel[1] + 64;
275     hpel[10]= hpel[8] + 1;
276
277 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
278
279     if(b==15){
280         int dxy = dx / 8 + dy / 8 * 4;
281         const uint8_t *src1 = hpel[dxy    ];
282         const uint8_t *src2 = hpel[dxy + 1];
283         const uint8_t *src3 = hpel[dxy + 4];
284         const uint8_t *src4 = hpel[dxy + 5];
285         int stride1 = MC_STRIDE(dxy);
286         int stride2 = MC_STRIDE(dxy + 1);
287         int stride3 = MC_STRIDE(dxy + 4);
288         int stride4 = MC_STRIDE(dxy + 5);
289         dx&=7;
290         dy&=7;
291         for(y=0; y < b_h; y++){
292             for(x=0; x < b_w; x++){
293                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
294                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
295             }
296             src1+=stride1;
297             src2+=stride2;
298             src3+=stride3;
299             src4+=stride4;
300             dst +=stride;
301         }
302     }else{
303         const uint8_t *src1= hpel[l];
304         const uint8_t *src2= hpel[r];
305         int stride1 = MC_STRIDE(l);
306         int stride2 = MC_STRIDE(r);
307         int a= weight[((dx&7) + (8*(dy&7)))];
308         int b= 8-a;
309         for(y=0; y < b_h; y++){
310             for(x=0; x < b_w; x++){
311                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
312             }
313             src1+=stride1;
314             src2+=stride2;
315             dst +=stride;
316         }
317     }
318 }
319
320 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
321     if(block->type & BLOCK_INTRA){
322         int x, y;
323         const unsigned color  = block->color[plane_index];
324         const unsigned color4 = color*0x01010101;
325         if(b_w==32){
326             for(y=0; y < b_h; y++){
327                 *(uint32_t*)&dst[0 + y*stride]= color4;
328                 *(uint32_t*)&dst[4 + y*stride]= color4;
329                 *(uint32_t*)&dst[8 + y*stride]= color4;
330                 *(uint32_t*)&dst[12+ y*stride]= color4;
331                 *(uint32_t*)&dst[16+ y*stride]= color4;
332                 *(uint32_t*)&dst[20+ y*stride]= color4;
333                 *(uint32_t*)&dst[24+ y*stride]= color4;
334                 *(uint32_t*)&dst[28+ y*stride]= color4;
335             }
336         }else if(b_w==16){
337             for(y=0; y < b_h; y++){
338                 *(uint32_t*)&dst[0 + y*stride]= color4;
339                 *(uint32_t*)&dst[4 + y*stride]= color4;
340                 *(uint32_t*)&dst[8 + y*stride]= color4;
341                 *(uint32_t*)&dst[12+ y*stride]= color4;
342             }
343         }else if(b_w==8){
344             for(y=0; y < b_h; y++){
345                 *(uint32_t*)&dst[0 + y*stride]= color4;
346                 *(uint32_t*)&dst[4 + y*stride]= color4;
347             }
348         }else if(b_w==4){
349             for(y=0; y < b_h; y++){
350                 *(uint32_t*)&dst[0 + y*stride]= color4;
351             }
352         }else{
353             for(y=0; y < b_h; y++){
354                 for(x=0; x < b_w; x++){
355                     dst[x + y*stride]= color;
356                 }
357             }
358         }
359     }else{
360         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
361         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
362         int mx= block->mx*scale;
363         int my= block->my*scale;
364         const int dx= mx&15;
365         const int dy= my&15;
366         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
367         sx += (mx>>4) - (HTAPS_MAX/2-1);
368         sy += (my>>4) - (HTAPS_MAX/2-1);
369         src += sx + sy*stride;
370         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
371            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
372             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
373                                      stride, stride,
374                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
375                                      sx, sy, w, h);
376             src= tmp + MB_SIZE;
377         }
378
379         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
380
381         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
382         if(    (dx&3) || (dy&3)
383             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
384             || (b_w&(b_w-1))
385             || b_w == 1
386             || b_h == 1
387             || !s->plane[plane_index].fast_mc )
388             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
389         else if(b_w==32){
390             int y;
391             for(y=0; y<b_h; y+=16){
392                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
393                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
394             }
395         }else if(b_w==b_h)
396             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
397         else if(b_w==2*b_h){
398             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
399             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
400         }else{
401             av_assert2(2*b_w==b_h);
402             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
403             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
404         }
405     }
406 }
407
408 #define mca(dx,dy,b_w)\
409 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
410     av_assert2(h==b_w);\
411     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
412 }
413
414 mca( 0, 0,16)
415 mca( 8, 0,16)
416 mca( 0, 8,16)
417 mca( 8, 8,16)
418 mca( 0, 0,8)
419 mca( 8, 0,8)
420 mca( 0, 8,8)
421 mca( 8, 8,8)
422
423 av_cold int ff_snow_common_init(AVCodecContext *avctx){
424     SnowContext *s = avctx->priv_data;
425     int width, height;
426     int i, j;
427
428     s->avctx= avctx;
429     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
430
431     ff_dsputil_init(&s->dsp, avctx);
432     ff_hpeldsp_init(&s->hdsp, avctx->flags);
433     ff_videodsp_init(&s->vdsp, 8);
434     ff_dwt_init(&s->dwt);
435     ff_h264qpel_init(&s->h264qpel, 8);
436
437 #define mcf(dx,dy)\
438     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
439     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
440         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
441     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
442     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
443         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
444
445     mcf( 0, 0)
446     mcf( 4, 0)
447     mcf( 8, 0)
448     mcf(12, 0)
449     mcf( 0, 4)
450     mcf( 4, 4)
451     mcf( 8, 4)
452     mcf(12, 4)
453     mcf( 0, 8)
454     mcf( 4, 8)
455     mcf( 8, 8)
456     mcf(12, 8)
457     mcf( 0,12)
458     mcf( 4,12)
459     mcf( 8,12)
460     mcf(12,12)
461
462 #define mcfh(dx,dy)\
463     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
464     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
465         mc_block_hpel ## dx ## dy ## 16;\
466     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
467     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
468         mc_block_hpel ## dx ## dy ## 8;
469
470     mcfh(0, 0)
471     mcfh(8, 0)
472     mcfh(0, 8)
473     mcfh(8, 8)
474
475     init_qexp();
476
477 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
478
479     width= s->avctx->width;
480     height= s->avctx->height;
481
482     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
483     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
484     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
485     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
486     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
487
488     for(i=0; i<MAX_REF_FRAMES; i++) {
489         for(j=0; j<MAX_REF_FRAMES; j++)
490             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
491         s->last_picture[i] = av_frame_alloc();
492         if (!s->last_picture[i])
493             goto fail;
494     }
495
496     s->mconly_picture = av_frame_alloc();
497     s->current_picture = av_frame_alloc();
498     if (!s->mconly_picture || !s->current_picture)
499         goto fail;
500
501     return 0;
502 fail:
503     return AVERROR(ENOMEM);
504 }
505
506 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
507     SnowContext *s = avctx->priv_data;
508     int plane_index, level, orientation;
509     int ret, emu_buf_size;
510
511     if(!s->scratchbuf) {
512         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
513                                  AV_GET_BUFFER_FLAG_REF)) < 0)
514             return ret;
515         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
516         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
517         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
518     }
519
520     if(s->mconly_picture->format != avctx->pix_fmt) {
521         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
522         return AVERROR_INVALIDDATA;
523     }
524
525     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
526         int w= s->avctx->width;
527         int h= s->avctx->height;
528
529         if(plane_index){
530             w>>= s->chroma_h_shift;
531             h>>= s->chroma_v_shift;
532         }
533         s->plane[plane_index].width = w;
534         s->plane[plane_index].height= h;
535
536         for(level=s->spatial_decomposition_count-1; level>=0; level--){
537             for(orientation=level ? 1 : 0; orientation<4; orientation++){
538                 SubBand *b= &s->plane[plane_index].band[level][orientation];
539
540                 b->buf= s->spatial_dwt_buffer;
541                 b->level= level;
542                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
543                 b->width = (w + !(orientation&1))>>1;
544                 b->height= (h + !(orientation>1))>>1;
545
546                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
547                 b->buf_x_offset = 0;
548                 b->buf_y_offset = 0;
549
550                 if(orientation&1){
551                     b->buf += (w+1)>>1;
552                     b->buf_x_offset = (w+1)>>1;
553                 }
554                 if(orientation>1){
555                     b->buf += b->stride>>1;
556                     b->buf_y_offset = b->stride_line >> 1;
557                 }
558                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
559
560                 if(level)
561                     b->parent= &s->plane[plane_index].band[level-1][orientation];
562                 //FIXME avoid this realloc
563                 av_freep(&b->x_coeff);
564                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
565                 if (!b->x_coeff)
566                     goto fail;
567             }
568             w= (w+1)>>1;
569             h= (h+1)>>1;
570         }
571     }
572
573     return 0;
574 fail:
575     return AVERROR(ENOMEM);
576 }
577
578 #define USE_HALFPEL_PLANE 0
579
580 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
581     int p,x,y;
582
583     for(p=0; p < s->nb_planes; p++){
584         int is_chroma= !!p;
585         int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
586         int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
587         int ls= frame->linesize[p];
588         uint8_t *src= frame->data[p];
589
590         halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
591         halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
592         halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
593         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p])
594             return AVERROR(ENOMEM);
595
596         halfpel[0][p]= src;
597         for(y=0; y<h; y++){
598             for(x=0; x<w; x++){
599                 int i= y*ls + x;
600
601                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
602             }
603         }
604         for(y=0; y<h; y++){
605             for(x=0; x<w; x++){
606                 int i= y*ls + x;
607
608                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
609             }
610         }
611         src= halfpel[1][p];
612         for(y=0; y<h; y++){
613             for(x=0; x<w; x++){
614                 int i= y*ls + x;
615
616                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
617             }
618         }
619
620 //FIXME border!
621     }
622     return 0;
623 }
624
625 void ff_snow_release_buffer(AVCodecContext *avctx)
626 {
627     SnowContext *s = avctx->priv_data;
628     int i;
629
630     if(s->last_picture[s->max_ref_frames-1]->data[0]){
631         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
632         for(i=0; i<9; i++)
633             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
634                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
635     }
636 }
637
638 int ff_snow_frame_start(SnowContext *s){
639    AVFrame *tmp;
640    int i, ret;
641    int w= s->avctx->width; //FIXME round up to x16 ?
642    int h= s->avctx->height;
643
644     if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
645         s->dsp.draw_edges(s->current_picture->data[0],
646                           s->current_picture->linesize[0], w   , h   ,
647                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
648         if (s->current_picture->data[2]) {
649             s->dsp.draw_edges(s->current_picture->data[1],
650                             s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
651                             EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
652             s->dsp.draw_edges(s->current_picture->data[2],
653                             s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
654                             EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
655         }
656     }
657
658     ff_snow_release_buffer(s->avctx);
659
660     tmp= s->last_picture[s->max_ref_frames-1];
661     for(i=s->max_ref_frames-1; i>0; i--)
662         s->last_picture[i] = s->last_picture[i-1];
663     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
664     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
665         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
666             return ret;
667     }
668     s->last_picture[0] = s->current_picture;
669     s->current_picture = tmp;
670
671     if(s->keyframe){
672         s->ref_frames= 0;
673     }else{
674         int i;
675         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
676             if(i && s->last_picture[i-1]->key_frame)
677                 break;
678         s->ref_frames= i;
679         if(s->ref_frames==0){
680             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
681             return -1;
682         }
683     }
684     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
685         return ret;
686
687     s->current_picture->key_frame= s->keyframe;
688
689     return 0;
690 }
691
692 av_cold void ff_snow_common_end(SnowContext *s)
693 {
694     int plane_index, level, orientation, i;
695
696     av_freep(&s->spatial_dwt_buffer);
697     av_freep(&s->temp_dwt_buffer);
698     av_freep(&s->spatial_idwt_buffer);
699     av_freep(&s->temp_idwt_buffer);
700     av_freep(&s->run_buffer);
701
702     s->m.me.temp= NULL;
703     av_freep(&s->m.me.scratchpad);
704     av_freep(&s->m.me.map);
705     av_freep(&s->m.me.score_map);
706     av_freep(&s->m.obmc_scratchpad);
707
708     av_freep(&s->block);
709     av_freep(&s->scratchbuf);
710     av_freep(&s->emu_edge_buffer);
711
712     for(i=0; i<MAX_REF_FRAMES; i++){
713         av_freep(&s->ref_mvs[i]);
714         av_freep(&s->ref_scores[i]);
715         if(s->last_picture[i]->data[0]) {
716             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
717         }
718         av_frame_free(&s->last_picture[i]);
719     }
720
721     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
722         for(level=s->spatial_decomposition_count-1; level>=0; level--){
723             for(orientation=level ? 1 : 0; orientation<4; orientation++){
724                 SubBand *b= &s->plane[plane_index].band[level][orientation];
725
726                 av_freep(&b->x_coeff);
727             }
728         }
729     }
730     av_frame_free(&s->mconly_picture);
731     av_frame_free(&s->current_picture);
732 }