2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
29 #include "rangecoder.h"
32 #include "mpegvideo.h"
38 static const int8_t quant3bA[256]={
39 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
40 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
41 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
42 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
43 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
44 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
45 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
46 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
47 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
48 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
49 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
50 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
51 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
52 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
53 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
54 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
57 static const uint8_t obmc32[1024]={
58 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
59 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
60 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
61 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
62 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
63 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
64 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
65 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
66 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
67 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
68 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
69 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
70 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
71 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
72 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
73 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
74 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
75 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
76 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
77 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
78 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
79 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
80 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
81 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
82 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
83 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
84 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
85 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
86 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
87 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
88 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
89 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
92 static const uint8_t obmc16[256]={
93 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
94 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
95 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
96 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
97 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
98 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
99 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
100 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
101 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
102 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
103 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
104 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
105 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
106 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
107 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
108 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
113 static const uint8_t obmc8[64]={
114 4, 12, 20, 28, 28, 20, 12, 4,
115 12, 36, 60, 84, 84, 60, 36, 12,
116 20, 60,100,140,140,100, 60, 20,
117 28, 84,140,196,196,140, 84, 28,
118 28, 84,140,196,196,140, 84, 28,
119 20, 60,100,140,140,100, 60, 20,
120 12, 36, 60, 84, 84, 60, 36, 12,
121 4, 12, 20, 28, 28, 20, 12, 4,
126 static const uint8_t obmc4[16]={
134 static const uint8_t * const obmc_tab[4]={
135 obmc32, obmc16, obmc8, obmc4
138 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
140 typedef struct BlockNode{
146 //#define TYPE_SPLIT 1
147 #define BLOCK_INTRA 1
149 //#define TYPE_NOCOLOR 4
150 uint8_t level; //FIXME merge into type?
153 static const BlockNode null_block= { //FIXME add border maybe
154 .color= {128,128,128},
162 #define LOG2_MB_SIZE 4
163 #define MB_SIZE (1<<LOG2_MB_SIZE)
164 #define ENCODER_EXTRA_BITS 4
167 typedef struct x_and_coeff{
172 typedef struct SubBand{
177 int qlog; ///< log(qscale)/log[2^(1/6)]
182 int stride_line; ///< Stride measured in lines, not pixels.
183 x_and_coeff * x_coeff;
184 struct SubBand *parent;
185 uint8_t state[/*7*2*/ 7 + 512][32];
188 typedef struct Plane{
191 SubBand band[MAX_DECOMPOSITIONS][4];
194 int8_t hcoeff[HTAPS_MAX/2];
199 int8_t last_hcoeff[HTAPS_MAX/2];
203 typedef struct SnowContext{
205 AVCodecContext *avctx;
210 AVFrame input_picture; ///< new_picture with the internal linesizes
211 AVFrame current_picture;
212 AVFrame last_picture[MAX_REF_FRAMES];
213 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
214 AVFrame mconly_picture;
215 // uint8_t q_context[16];
216 uint8_t header_state[32];
217 uint8_t block_state[128 + 32*128];
221 int spatial_decomposition_type;
222 int last_spatial_decomposition_type;
223 int temporal_decomposition_type;
224 int spatial_decomposition_count;
225 int last_spatial_decomposition_count;
226 int temporal_decomposition_count;
229 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
230 uint32_t *ref_scores[MAX_REF_FRAMES];
231 DWTELEM *spatial_dwt_buffer;
232 IDWTELEM *spatial_idwt_buffer;
236 int spatial_scalability;
246 #define QBIAS_SHIFT 3
250 int last_block_max_depth;
251 Plane plane[MAX_PLANES];
253 #define ME_CACHE_SIZE 1024
254 int me_cache[ME_CACHE_SIZE];
255 int me_cache_generation;
259 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
265 // Avoid a name clash on SGI IRIX
268 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
269 static uint8_t qexp[QROOT];
271 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
275 const int a= FFABS(v);
276 const int e= av_log2(a);
277 const int el= FFMIN(e, 10);
278 put_rac(c, state+0, 0);
281 put_rac(c, state+1+i, 1); //1..10
284 put_rac(c, state+1+9, 1); //1..10
286 put_rac(c, state+1+FFMIN(i,9), 0);
288 for(i=e-1; i>=el; i--){
289 put_rac(c, state+22+9, (a>>i)&1); //22..31
292 put_rac(c, state+22+i, (a>>i)&1); //22..31
296 put_rac(c, state+11 + el, v < 0); //11..21
298 put_rac(c, state+0, 1);
302 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
303 if(get_rac(c, state+0))
308 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
313 for(i=e-1; i>=0; i--){
314 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
317 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
322 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
324 int r= log2>=0 ? 1<<log2 : 1;
330 put_rac(c, state+4+log2, 1);
335 put_rac(c, state+4+log2, 0);
337 for(i=log2-1; i>=0; i--){
338 put_rac(c, state+31-i, (v>>i)&1);
342 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
344 int r= log2>=0 ? 1<<log2 : 1;
349 while(get_rac(c, state+4+log2)){
355 for(i=log2-1; i>=0; i--){
356 v+= get_rac(c, state+31-i)<<i;
362 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
363 const int w= b->width;
364 const int h= b->height;
368 x_and_coeff *xc= b->x_coeff;
369 x_and_coeff *prev_xc= NULL;
370 x_and_coeff *prev2_xc= xc;
371 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
372 x_and_coeff *prev_parent_xc= parent_xc;
374 runs= get_symbol2(&s->c, b->state[30], 0);
375 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
382 if(y && prev_xc->x == 0){
394 if(prev_xc->x == x + 1)
400 if(x>>1 > parent_xc->x){
403 if(x>>1 == parent_xc->x){
407 if(/*ll|*/l|lt|t|rt|p){
408 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
410 v=get_rac(&s->c, &b->state[0][context]);
412 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
413 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
420 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
422 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
423 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
432 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
433 else max_run= FFMIN(run, w-x-1);
435 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
441 (xc++)->x= w+1; //end marker
447 while(parent_xc->x != parent->width+1)
450 prev_parent_xc= parent_xc;
452 parent_xc= prev_parent_xc;
457 (xc++)->x= w+1; //end marker
460 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
461 const int w= b->width;
463 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
464 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
465 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
468 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
473 /* If we are on the second or later slice, restore our index. */
475 new_index = save_state[0];
478 for(y=start_y; y<h; y++){
481 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
482 memset(line, 0, b->width*sizeof(IDWTELEM));
483 v = b->x_coeff[new_index].coeff;
484 x = b->x_coeff[new_index++].x;
486 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
487 register int u= -(v&1);
490 v = b->x_coeff[new_index].coeff;
491 x = b->x_coeff[new_index++].x;
495 /* Save our variables for the next slice. */
496 save_state[0] = new_index;
501 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
502 int plane_index, level, orientation;
504 for(plane_index=0; plane_index<3; plane_index++){
505 for(level=0; level<MAX_DECOMPOSITIONS; level++){
506 for(orientation=level ? 1:0; orientation<4; orientation++){
507 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
511 memset(s->header_state, MID_STATE, sizeof(s->header_state));
512 memset(s->block_state, MID_STATE, sizeof(s->block_state));
515 static int alloc_blocks(SnowContext *s){
516 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
517 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
523 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
527 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
528 const int w= s->b_width << s->block_max_depth;
529 const int rem_depth= s->block_max_depth - level;
530 const int index= (x + y*w) << rem_depth;
531 const int block_w= 1<<rem_depth;
544 for(j=0; j<block_w; j++){
545 for(i=0; i<block_w; i++){
546 s->block[index + i + j*w]= block;
551 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
552 const int offset[3]= {
554 ((y*c->uvstride + x)>>1),
555 ((y*c->uvstride + x)>>1),
559 c->src[0][i]= src [i];
560 c->ref[0][i]= ref [i] + offset[i];
565 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
566 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
567 if(s->ref_frames == 1){
568 *mx = mid_pred(left->mx, top->mx, tr->mx);
569 *my = mid_pred(left->my, top->my, tr->my);
571 const int *scale = scale_mv_ref[ref];
572 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
573 (top ->mx * scale[top ->ref] + 128) >>8,
574 (tr ->mx * scale[tr ->ref] + 128) >>8);
575 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
576 (top ->my * scale[top ->ref] + 128) >>8,
577 (tr ->my * scale[tr ->ref] + 128) >>8);
581 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
582 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
583 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
585 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
589 static void decode_q_branch(SnowContext *s, int level, int x, int y){
590 const int w= s->b_width << s->block_max_depth;
591 const int rem_depth= s->block_max_depth - level;
592 const int index= (x + y*w) << rem_depth;
593 int trx= (x+1)<<rem_depth;
594 const BlockNode *left = x ? &s->block[index-1] : &null_block;
595 const BlockNode *top = y ? &s->block[index-w] : &null_block;
596 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
597 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
598 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
601 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
605 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
607 int l = left->color[0];
608 int cb= left->color[1];
609 int cr= left->color[2];
611 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
612 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
613 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
615 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
618 pred_mv(s, &mx, &my, 0, left, top, tr);
619 l += get_symbol(&s->c, &s->block_state[32], 1);
620 cb+= get_symbol(&s->c, &s->block_state[64], 1);
621 cr+= get_symbol(&s->c, &s->block_state[96], 1);
623 if(s->ref_frames > 1)
624 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
625 pred_mv(s, &mx, &my, ref, left, top, tr);
626 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
627 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
629 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
631 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
632 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
633 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
634 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
638 static void decode_blocks(SnowContext *s){
645 decode_q_branch(s, 0, x, y);
650 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
651 static const uint8_t weight[64]={
662 static const uint8_t brane[256]={
663 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
664 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
665 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
666 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
667 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
668 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
669 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
670 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
671 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
672 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
673 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
674 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
675 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
676 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
677 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
678 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
681 static const uint8_t needs[16]={
689 int16_t tmpIt [64*(32+HTAPS_MAX)];
690 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
691 int16_t *tmpI= tmpIt;
692 uint8_t *tmp2= tmp2t[0];
693 const uint8_t *hpel[11];
694 assert(dx<16 && dy<16);
695 r= brane[dx + 16*dy]&15;
696 l= brane[dx + 16*dy]>>4;
698 b= needs[l] | needs[r];
703 for(y=0; y < b_h+HTAPS_MAX-1; y++){
704 for(x=0; x < b_w; x++){
705 int a_1=src[x + HTAPS_MAX/2-4];
706 int a0= src[x + HTAPS_MAX/2-3];
707 int a1= src[x + HTAPS_MAX/2-2];
708 int a2= src[x + HTAPS_MAX/2-1];
709 int a3= src[x + HTAPS_MAX/2+0];
710 int a4= src[x + HTAPS_MAX/2+1];
711 int a5= src[x + HTAPS_MAX/2+2];
712 int a6= src[x + HTAPS_MAX/2+3];
714 if(!p || p->fast_mc){
715 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
719 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
724 if(am&(~255)) am= ~(am>>31);
733 src += HTAPS_MAX/2 - 1;
737 for(y=0; y < b_h; y++){
738 for(x=0; x < b_w+1; x++){
739 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
740 int a0= src[x + (HTAPS_MAX/2-3)*stride];
741 int a1= src[x + (HTAPS_MAX/2-2)*stride];
742 int a2= src[x + (HTAPS_MAX/2-1)*stride];
743 int a3= src[x + (HTAPS_MAX/2+0)*stride];
744 int a4= src[x + (HTAPS_MAX/2+1)*stride];
745 int a5= src[x + (HTAPS_MAX/2+2)*stride];
746 int a6= src[x + (HTAPS_MAX/2+3)*stride];
749 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
751 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
753 if(am&(~255)) am= ~(am>>31);
761 src += stride*(HTAPS_MAX/2 - 1);
765 for(y=0; y < b_h; y++){
766 for(x=0; x < b_w; x++){
767 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
768 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
769 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
770 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
771 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
772 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
773 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
774 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
777 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
779 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
780 if(am&(~255)) am= ~(am>>31);
789 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
794 hpel[ 6]= tmp2t[1] + 1;
796 hpel[ 8]= src + stride;
797 hpel[ 9]= hpel[1] + stride;
798 hpel[10]= hpel[8] + 1;
801 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
802 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
803 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
804 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
807 for(y=0; y < b_h; y++){
808 for(x=0; x < b_w; x++){
809 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
810 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
819 const uint8_t *src1= hpel[l];
820 const uint8_t *src2= hpel[r];
821 int a= weight[((dx&7) + (8*(dy&7)))];
823 for(y=0; y < b_h; y++){
824 for(x=0; x < b_w; x++){
825 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
834 #define mca(dx,dy,b_w)\
835 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
837 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
849 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
850 if(block->type & BLOCK_INTRA){
852 const int color = block->color[plane_index];
853 const int color4= color*0x01010101;
855 for(y=0; y < b_h; y++){
856 *(uint32_t*)&dst[0 + y*stride]= color4;
857 *(uint32_t*)&dst[4 + y*stride]= color4;
858 *(uint32_t*)&dst[8 + y*stride]= color4;
859 *(uint32_t*)&dst[12+ y*stride]= color4;
860 *(uint32_t*)&dst[16+ y*stride]= color4;
861 *(uint32_t*)&dst[20+ y*stride]= color4;
862 *(uint32_t*)&dst[24+ y*stride]= color4;
863 *(uint32_t*)&dst[28+ y*stride]= color4;
866 for(y=0; y < b_h; y++){
867 *(uint32_t*)&dst[0 + y*stride]= color4;
868 *(uint32_t*)&dst[4 + y*stride]= color4;
869 *(uint32_t*)&dst[8 + y*stride]= color4;
870 *(uint32_t*)&dst[12+ y*stride]= color4;
873 for(y=0; y < b_h; y++){
874 *(uint32_t*)&dst[0 + y*stride]= color4;
875 *(uint32_t*)&dst[4 + y*stride]= color4;
878 for(y=0; y < b_h; y++){
879 *(uint32_t*)&dst[0 + y*stride]= color4;
882 for(y=0; y < b_h; y++){
883 for(x=0; x < b_w; x++){
884 dst[x + y*stride]= color;
889 uint8_t *src= s->last_picture[block->ref].data[plane_index];
890 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
891 int mx= block->mx*scale;
892 int my= block->my*scale;
895 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
896 sx += (mx>>4) - (HTAPS_MAX/2-1);
897 sy += (my>>4) - (HTAPS_MAX/2-1);
898 src += sx + sy*stride;
899 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
900 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
901 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
904 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
905 // assert(!(b_w&(b_w-1)));
906 assert(b_w>1 && b_h>1);
907 assert((tab_index>=0 && tab_index<4) || b_w==32);
908 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
909 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
912 for(y=0; y<b_h; y+=16){
913 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
914 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
917 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
919 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
920 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
923 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
924 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
929 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
930 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
933 for(y=0; y<b_h; y++){
934 //FIXME ugly misuse of obmc_stride
935 const uint8_t *obmc1= obmc + y*obmc_stride;
936 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
937 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
938 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
939 dst = slice_buffer_get_line(sb, src_y + y);
940 for(x=0; x<b_w; x++){
941 int v= obmc1[x] * block[3][x + y*src_stride]
942 +obmc2[x] * block[2][x + y*src_stride]
943 +obmc3[x] * block[1][x + y*src_stride]
944 +obmc4[x] * block[0][x + y*src_stride];
946 v <<= 8 - LOG2_OBMC_MAX;
952 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
953 if(v&(~255)) v= ~(v>>31);
954 dst8[x + y*src_stride] = v;
962 //FIXME name cleanup (b_w, block_w, b_width stuff)
963 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
964 const int b_width = s->b_width << s->block_max_depth;
965 const int b_height= s->b_height << s->block_max_depth;
966 const int b_stride= b_width;
967 BlockNode *lt= &s->block[b_x + b_y*b_stride];
969 BlockNode *lb= lt+b_stride;
972 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
973 uint8_t *tmp = s->scratchbuf;
980 }else if(b_x + 1 >= b_width){
987 }else if(b_y + 1 >= b_height){
992 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
995 if(!sliced && !offset_dst)
998 }else if(src_x + b_w > w){
1002 obmc -= src_y*obmc_stride;
1004 if(!sliced && !offset_dst)
1005 dst -= src_y*dst_stride;
1007 }else if(src_y + b_h> h){
1011 if(b_w<=0 || b_h<=0) return;
1013 assert(src_stride > 2*MB_SIZE + 5);
1015 if(!sliced && offset_dst)
1016 dst += src_x + src_y*dst_stride;
1017 dst8+= src_x + src_y*src_stride;
1018 // src += src_x + src_y*src_stride;
1020 ptmp= tmp + 3*tmp_step;
1023 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1025 if(same_block(lt, rt)){
1030 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1033 if(same_block(lt, lb)){
1035 }else if(same_block(rt, lb)){
1040 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1043 if(same_block(lt, rb) ){
1045 }else if(same_block(rt, rb)){
1047 }else if(same_block(lb, rb)){
1051 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1054 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1056 for(y=0; y<b_h; y++){
1057 //FIXME ugly misuse of obmc_stride
1058 const uint8_t *obmc1= obmc + y*obmc_stride;
1059 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1060 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1061 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1062 for(x=0; x<b_w; x++){
1063 int v= obmc1[x] * block[3][x + y*src_stride]
1064 +obmc2[x] * block[2][x + y*src_stride]
1065 +obmc3[x] * block[1][x + y*src_stride]
1066 +obmc4[x] * block[0][x + y*src_stride];
1068 v <<= 8 - LOG2_OBMC_MAX;
1070 v >>= 8 - FRAC_BITS;
1073 v += dst[x + y*dst_stride];
1074 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1075 if(v&(~255)) v= ~(v>>31);
1076 dst8[x + y*src_stride] = v;
1078 dst[x + y*dst_stride] -= v;
1085 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1086 Plane *p= &s->plane[plane_index];
1087 const int mb_w= s->b_width << s->block_max_depth;
1088 const int mb_h= s->b_height << s->block_max_depth;
1090 int block_size = MB_SIZE >> s->block_max_depth;
1091 int block_w = plane_index ? block_size/2 : block_size;
1092 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1093 int obmc_stride= plane_index ? block_size : 2*block_size;
1094 int ref_stride= s->current_picture.linesize[plane_index];
1095 uint8_t *dst8= s->current_picture.data[plane_index];
1099 if(s->keyframe || (s->avctx->debug&512)){
1104 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1105 // DWTELEM * line = slice_buffer_get_line(sb, y);
1106 IDWTELEM * line = sb->line[y];
1108 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1109 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1111 if(v&(~255)) v= ~(v>>31);
1112 dst8[x + y*ref_stride]= v;
1116 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1117 // DWTELEM * line = slice_buffer_get_line(sb, y);
1118 IDWTELEM * line = sb->line[y];
1120 line[x] -= 128 << FRAC_BITS;
1121 // buf[x + y*w]-= 128<<FRAC_BITS;
1129 for(mb_x=0; mb_x<=mb_w; mb_x++){
1130 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1131 block_w*mb_x - block_w/2,
1132 block_w*mb_y - block_w/2,
1135 w, ref_stride, obmc_stride,
1137 add, 0, plane_index);
1141 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1142 Plane *p= &s->plane[plane_index];
1143 const int mb_w= s->b_width << s->block_max_depth;
1144 const int mb_h= s->b_height << s->block_max_depth;
1146 int block_size = MB_SIZE >> s->block_max_depth;
1147 int block_w = plane_index ? block_size/2 : block_size;
1148 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1149 const int obmc_stride= plane_index ? block_size : 2*block_size;
1150 int ref_stride= s->current_picture.linesize[plane_index];
1151 uint8_t *dst8= s->current_picture.data[plane_index];
1155 if(s->keyframe || (s->avctx->debug&512)){
1160 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1162 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1164 if(v&(~255)) v= ~(v>>31);
1165 dst8[x + y*ref_stride]= v;
1169 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1171 buf[x + y*w]-= 128<<FRAC_BITS;
1179 for(mb_x=0; mb_x<=mb_w; mb_x++){
1180 add_yblock(s, 0, NULL, buf, dst8, obmc,
1181 block_w*mb_x - block_w/2,
1182 block_w*mb_y - block_w/2,
1185 w, ref_stride, obmc_stride,
1187 add, 1, plane_index);
1191 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1192 const int mb_h= s->b_height << s->block_max_depth;
1194 for(mb_y=0; mb_y<=mb_h; mb_y++)
1195 predict_slice(s, buf, plane_index, add, mb_y);
1198 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1199 const int w= b->width;
1200 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1201 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1202 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1205 if(s->qlog == LOSSLESS_QLOG) return;
1207 for(y=start_y; y<end_y; y++){
1208 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1209 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1213 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1215 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1221 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1222 const int w= b->width;
1225 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1229 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1231 for(y=start_y; y<end_y; y++){
1233 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1234 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1238 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1239 else line[x] += line[x - 1];
1241 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1242 else line[x] += line[x - 1];
1245 if(y) line[x] += prev[x];
1251 static void decode_qlogs(SnowContext *s){
1252 int plane_index, level, orientation;
1254 for(plane_index=0; plane_index<3; plane_index++){
1255 for(level=0; level<s->spatial_decomposition_count; level++){
1256 for(orientation=level ? 1:0; orientation<4; orientation++){
1258 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1259 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1260 else q= get_symbol(&s->c, s->header_state, 1);
1261 s->plane[plane_index].band[level][orientation].qlog= q;
1267 #define GET_S(dst, check) \
1268 tmp= get_symbol(&s->c, s->header_state, 0);\
1270 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1275 static int decode_header(SnowContext *s){
1276 int plane_index, tmp;
1279 memset(kstate, MID_STATE, sizeof(kstate));
1281 s->keyframe= get_rac(&s->c, kstate);
1282 if(s->keyframe || s->always_reset){
1284 s->spatial_decomposition_type=
1288 s->block_max_depth= 0;
1291 GET_S(s->version, tmp <= 0U)
1292 s->always_reset= get_rac(&s->c, s->header_state);
1293 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1294 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1295 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1296 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1297 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1298 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1299 s->spatial_scalability= get_rac(&s->c, s->header_state);
1300 // s->rate_scalability= get_rac(&s->c, s->header_state);
1301 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1302 s->max_ref_frames++;
1308 if(get_rac(&s->c, s->header_state)){
1309 for(plane_index=0; plane_index<2; plane_index++){
1310 int htaps, i, sum=0;
1311 Plane *p= &s->plane[plane_index];
1312 p->diag_mc= get_rac(&s->c, s->header_state);
1313 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1314 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1317 for(i= htaps/2; i; i--){
1318 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1319 sum += p->hcoeff[i];
1321 p->hcoeff[0]= 32-sum;
1323 s->plane[2].diag_mc= s->plane[1].diag_mc;
1324 s->plane[2].htaps = s->plane[1].htaps;
1325 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1327 if(get_rac(&s->c, s->header_state)){
1328 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1333 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1334 if(s->spatial_decomposition_type > 1U){
1335 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1338 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1339 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1340 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1344 s->qlog += get_symbol(&s->c, s->header_state, 1);
1345 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1346 s->qbias += get_symbol(&s->c, s->header_state, 1);
1347 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1348 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1349 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1350 s->block_max_depth= 0;
1357 static void init_qexp(void){
1361 for(i=0; i<QROOT; i++){
1363 v *= pow(2, 1.0 / QROOT);
1367 static av_cold int common_init(AVCodecContext *avctx){
1368 SnowContext *s = avctx->priv_data;
1373 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1375 dsputil_init(&s->dsp, avctx);
1376 ff_dwt_init(&s->dwt);
1379 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1380 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1381 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1382 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1383 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1384 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1403 #define mcfh(dx,dy)\
1404 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1405 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1406 mc_block_hpel ## dx ## dy ## 16;\
1407 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1408 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1409 mc_block_hpel ## dx ## dy ## 8;
1419 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1421 width= s->avctx->width;
1422 height= s->avctx->height;
1424 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1425 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1427 for(i=0; i<MAX_REF_FRAMES; i++)
1428 for(j=0; j<MAX_REF_FRAMES; j++)
1429 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1431 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1432 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1437 static int common_init_after_header(AVCodecContext *avctx){
1438 SnowContext *s = avctx->priv_data;
1439 int plane_index, level, orientation;
1441 for(plane_index=0; plane_index<3; plane_index++){
1442 int w= s->avctx->width;
1443 int h= s->avctx->height;
1446 w>>= s->chroma_h_shift;
1447 h>>= s->chroma_v_shift;
1449 s->plane[plane_index].width = w;
1450 s->plane[plane_index].height= h;
1452 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1453 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1454 SubBand *b= &s->plane[plane_index].band[level][orientation];
1456 b->buf= s->spatial_dwt_buffer;
1458 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1459 b->width = (w + !(orientation&1))>>1;
1460 b->height= (h + !(orientation>1))>>1;
1462 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1463 b->buf_x_offset = 0;
1464 b->buf_y_offset = 0;
1468 b->buf_x_offset = (w+1)>>1;
1471 b->buf += b->stride>>1;
1472 b->buf_y_offset = b->stride_line >> 1;
1474 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1477 b->parent= &s->plane[plane_index].band[level-1][orientation];
1478 //FIXME avoid this realloc
1479 av_freep(&b->x_coeff);
1480 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1495 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1496 SubBand *b= &p->band[level][orientation];
1500 int step= 1 << (s->spatial_decomposition_count - level);
1507 //FIXME bias for nonzero ?
1509 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1510 for(y=0; y<p->height; y++){
1511 for(x=0; x<p->width; x++){
1512 int sx= (x-xo + step/2) / step / Q2_STEP;
1513 int sy= (y-yo + step/2) / step / Q2_STEP;
1514 int v= r0[x + y*p->width] - r1[x + y*p->width];
1515 assert(sx>=0 && sy>=0 && sx < score_stride);
1517 score[sx + sy*score_stride] += v*v;
1518 assert(score[sx + sy*score_stride] >= 0);
1523 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1524 int level, orientation;
1526 for(level=0; level<s->spatial_decomposition_count; level++){
1527 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1528 SubBand *b= &p->band[level][orientation];
1529 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1531 dequantize(s, b, dst, b->stride);
1536 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1537 int level, orientation, ys, xs, x, y, pass;
1538 IDWTELEM best_dequant[height * stride];
1539 IDWTELEM idwt2_buffer[height * stride];
1540 const int score_stride= (width + 10)/Q2_STEP;
1541 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1542 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1543 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1545 //FIXME pass the copy cleanly ?
1547 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1548 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1550 for(level=0; level<s->spatial_decomposition_count; level++){
1551 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1552 SubBand *b= &p->band[level][orientation];
1553 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1554 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1555 assert(src == b->buf); // code does not depend on this but it is true currently
1557 quantize(s, b, dst, src, b->stride, s->qbias);
1560 for(pass=0; pass<1; pass++){
1561 if(s->qbias == 0) //keyframe
1563 for(level=0; level<s->spatial_decomposition_count; level++){
1564 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1565 SubBand *b= &p->band[level][orientation];
1566 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1567 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1569 for(ys= 0; ys<Q2_STEP; ys++){
1570 for(xs= 0; xs<Q2_STEP; xs++){
1571 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1572 dequantize_all(s, p, idwt2_buffer, width, height);
1573 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1574 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1575 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1576 for(y=ys; y<b->height; y+= Q2_STEP){
1577 for(x=xs; x<b->width; x+= Q2_STEP){
1578 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1579 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1580 //FIXME try more than just --
1583 dequantize_all(s, p, idwt2_buffer, width, height);
1584 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1585 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1586 for(y=ys; y<b->height; y+= Q2_STEP){
1587 for(x=xs; x<b->width; x+= Q2_STEP){
1588 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1589 if(score[score_idx] <= best_score[score_idx] + threshold){
1590 best_score[score_idx]= score[score_idx];
1591 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1592 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1593 //FIXME copy instead
1602 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1605 #endif /* QUANTIZE2==1 */
1607 #define USE_HALFPEL_PLANE 0
1609 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1614 int w= s->avctx->width >>is_chroma;
1615 int h= s->avctx->height >>is_chroma;
1616 int ls= frame->linesize[p];
1617 uint8_t *src= frame->data[p];
1619 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1620 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1621 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1628 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1635 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1643 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1651 static void release_buffer(AVCodecContext *avctx){
1652 SnowContext *s = avctx->priv_data;
1655 if(s->last_picture[s->max_ref_frames-1].data[0]){
1656 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1658 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1659 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1663 static int frame_start(SnowContext *s){
1665 int w= s->avctx->width; //FIXME round up to x16 ?
1666 int h= s->avctx->height;
1668 if (s->current_picture.data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
1669 s->dsp.draw_edges(s->current_picture.data[0],
1670 s->current_picture.linesize[0], w , h ,
1671 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1672 s->dsp.draw_edges(s->current_picture.data[1],
1673 s->current_picture.linesize[1], w>>1, h>>1,
1674 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1675 s->dsp.draw_edges(s->current_picture.data[2],
1676 s->current_picture.linesize[2], w>>1, h>>1,
1677 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1680 release_buffer(s->avctx);
1682 tmp= s->last_picture[s->max_ref_frames-1];
1683 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1684 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1685 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1686 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1687 s->last_picture[0]= s->current_picture;
1688 s->current_picture= tmp;
1694 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
1695 if(i && s->last_picture[i-1].key_frame)
1698 if(s->ref_frames==0){
1699 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
1704 s->current_picture.reference= 1;
1705 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
1706 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1710 s->current_picture.key_frame= s->keyframe;
1715 static av_cold void common_end(SnowContext *s){
1716 int plane_index, level, orientation, i;
1718 av_freep(&s->spatial_dwt_buffer);
1719 av_freep(&s->spatial_idwt_buffer);
1722 av_freep(&s->m.me.scratchpad);
1723 av_freep(&s->m.me.map);
1724 av_freep(&s->m.me.score_map);
1725 av_freep(&s->m.obmc_scratchpad);
1727 av_freep(&s->block);
1728 av_freep(&s->scratchbuf);
1730 for(i=0; i<MAX_REF_FRAMES; i++){
1731 av_freep(&s->ref_mvs[i]);
1732 av_freep(&s->ref_scores[i]);
1733 if(s->last_picture[i].data[0])
1734 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
1737 for(plane_index=0; plane_index<3; plane_index++){
1738 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1739 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1740 SubBand *b= &s->plane[plane_index].band[level][orientation];
1742 av_freep(&b->x_coeff);
1746 if (s->mconly_picture.data[0])
1747 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
1748 if (s->current_picture.data[0])
1749 s->avctx->release_buffer(s->avctx, &s->current_picture);
1752 static av_cold int decode_init(AVCodecContext *avctx)
1754 avctx->pix_fmt= PIX_FMT_YUV420P;
1761 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
1762 const uint8_t *buf = avpkt->data;
1763 int buf_size = avpkt->size;
1764 SnowContext *s = avctx->priv_data;
1765 RangeCoder * const c= &s->c;
1767 AVFrame *picture = data;
1768 int level, orientation, plane_index;
1770 ff_init_range_decoder(c, buf, buf_size);
1771 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1773 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
1774 if(decode_header(s)<0)
1776 common_init_after_header(avctx);
1778 // realloc slice buffer for the case that spatial_decomposition_count changed
1779 ff_slice_buffer_destroy(&s->sb);
1780 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
1782 for(plane_index=0; plane_index<3; plane_index++){
1783 Plane *p= &s->plane[plane_index];
1784 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
1785 && p->hcoeff[1]==-10
1791 if(frame_start(s) < 0)
1793 //keyframe flag duplication mess FIXME
1794 if(avctx->debug&FF_DEBUG_PICT_INFO)
1795 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
1799 for(plane_index=0; plane_index<3; plane_index++){
1800 Plane *p= &s->plane[plane_index];
1804 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
1806 if(s->avctx->debug&2048){
1807 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
1808 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1812 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
1813 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
1819 for(level=0; level<s->spatial_decomposition_count; level++){
1820 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1821 SubBand *b= &p->band[level][orientation];
1822 unpack_coeffs(s, b, b->parent, orientation);
1828 const int mb_h= s->b_height << s->block_max_depth;
1829 const int block_size = MB_SIZE >> s->block_max_depth;
1830 const int block_w = plane_index ? block_size/2 : block_size;
1832 DWTCompose cs[MAX_DECOMPOSITIONS];
1837 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
1838 for(mb_y=0; mb_y<=mb_h; mb_y++){
1840 int slice_starty = block_w*mb_y;
1841 int slice_h = block_w*(mb_y+1);
1842 if (!(s->keyframe || s->avctx->debug&512)){
1843 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
1844 slice_h -= (block_w >> 1);
1847 for(level=0; level<s->spatial_decomposition_count; level++){
1848 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1849 SubBand *b= &p->band[level][orientation];
1852 int our_mb_start = mb_y;
1853 int our_mb_end = (mb_y + 1);
1855 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
1856 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
1857 if (!(s->keyframe || s->avctx->debug&512)){
1858 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1859 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1861 start_y = FFMIN(b->height, start_y);
1862 end_y = FFMIN(b->height, end_y);
1864 if (start_y != end_y){
1865 if (orientation == 0){
1866 SubBand * correlate_band = &p->band[0][0];
1867 int correlate_end_y = FFMIN(b->height, end_y + 1);
1868 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
1869 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
1870 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
1871 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
1874 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
1879 for(; yd<slice_h; yd+=4){
1880 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
1883 if(s->qlog == LOSSLESS_QLOG){
1884 for(; yq<slice_h && yq<h; yq++){
1885 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
1887 line[x] <<= FRAC_BITS;
1892 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
1894 y = FFMIN(p->height, slice_starty);
1895 end_y = FFMIN(p->height, slice_h);
1897 ff_slice_buffer_release(&s->sb, y++);
1900 ff_slice_buffer_flush(&s->sb);
1907 release_buffer(avctx);
1909 if(!(s->avctx->debug&2048))
1910 *picture= s->current_picture;
1912 *picture= s->mconly_picture;
1914 *data_size = sizeof(AVFrame);
1916 bytes_read= c->bytestream - c->bytestream_start;
1917 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
1922 static av_cold int decode_end(AVCodecContext *avctx)
1924 SnowContext *s = avctx->priv_data;
1926 ff_slice_buffer_destroy(&s->sb);
1933 AVCodec ff_snow_decoder = {
1935 .type = AVMEDIA_TYPE_VIDEO,
1936 .id = CODEC_ID_SNOW,
1937 .priv_data_size = sizeof(SnowContext),
1938 .init = decode_init,
1939 .close = decode_end,
1940 .decode = decode_frame,
1941 .capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
1942 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
1945 #if CONFIG_SNOW_ENCODER
1946 static av_cold int encode_init(AVCodecContext *avctx)
1948 SnowContext *s = avctx->priv_data;
1951 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
1952 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
1953 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
1957 if(avctx->prediction_method == DWT_97
1958 && (avctx->flags & CODEC_FLAG_QSCALE)
1959 && avctx->global_quality == 0){
1960 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
1964 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
1966 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
1967 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
1969 for(plane_index=0; plane_index<3; plane_index++){
1970 s->plane[plane_index].diag_mc= 1;
1971 s->plane[plane_index].htaps= 6;
1972 s->plane[plane_index].hcoeff[0]= 40;
1973 s->plane[plane_index].hcoeff[1]= -10;
1974 s->plane[plane_index].hcoeff[2]= 2;
1975 s->plane[plane_index].fast_mc= 1;
1984 s->m.flags = avctx->flags;
1985 s->m.bit_rate= avctx->bit_rate;
1988 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1989 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1990 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1991 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
1992 h263_encode_init(&s->m); //mv_penalty
1994 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
1996 if(avctx->flags&CODEC_FLAG_PASS1){
1997 if(!avctx->stats_out)
1998 avctx->stats_out = av_mallocz(256);
2000 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2001 if(ff_rate_control_init(&s->m) < 0)
2004 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2006 avctx->coded_frame= &s->current_picture;
2007 switch(avctx->pix_fmt){
2008 // case PIX_FMT_YUV444P:
2009 // case PIX_FMT_YUV422P:
2010 case PIX_FMT_YUV420P:
2012 // case PIX_FMT_YUV411P:
2013 // case PIX_FMT_YUV410P:
2014 s->colorspace_type= 0;
2016 /* case PIX_FMT_RGB32:
2020 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2023 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2024 s->chroma_h_shift= 1;
2025 s->chroma_v_shift= 1;
2027 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2028 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2030 s->avctx->get_buffer(s->avctx, &s->input_picture);
2032 if(s->avctx->me_method == ME_ITER){
2034 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2035 for(i=0; i<s->max_ref_frames; i++){
2036 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2037 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2044 //near copy & paste from dsputil, FIXME
2045 static int pix_sum(uint8_t * pix, int line_size, int w)
2050 for (i = 0; i < w; i++) {
2051 for (j = 0; j < w; j++) {
2055 pix += line_size - w;
2060 //near copy & paste from dsputil, FIXME
2061 static int pix_norm1(uint8_t * pix, int line_size, int w)
2064 uint32_t *sq = ff_squareTbl + 256;
2067 for (i = 0; i < w; i++) {
2068 for (j = 0; j < w; j ++) {
2072 pix += line_size - w;
2080 #define P_TOPRIGHT P[3]
2081 #define P_MEDIAN P[4]
2083 #define FLAG_QPEL 1 //must be 1
2085 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2086 uint8_t p_buffer[1024];
2087 uint8_t i_buffer[1024];
2088 uint8_t p_state[sizeof(s->block_state)];
2089 uint8_t i_state[sizeof(s->block_state)];
2091 uint8_t *pbbak= s->c.bytestream;
2092 uint8_t *pbbak_start= s->c.bytestream_start;
2093 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2094 const int w= s->b_width << s->block_max_depth;
2095 const int h= s->b_height << s->block_max_depth;
2096 const int rem_depth= s->block_max_depth - level;
2097 const int index= (x + y*w) << rem_depth;
2098 const int block_w= 1<<(LOG2_MB_SIZE - level);
2099 int trx= (x+1)<<rem_depth;
2100 int try= (y+1)<<rem_depth;
2101 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2102 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2103 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2104 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2105 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2106 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2107 int pl = left->color[0];
2108 int pcb= left->color[1];
2109 int pcr= left->color[2];
2113 const int stride= s->current_picture.linesize[0];
2114 const int uvstride= s->current_picture.linesize[1];
2115 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2116 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2117 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2119 int16_t last_mv[3][2];
2120 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2121 const int shift= 1+qpel;
2122 MotionEstContext *c= &s->m.me;
2123 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2124 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2125 int my_context= av_log2(2*FFABS(left->my - top->my));
2126 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2127 int ref, best_ref, ref_score, ref_mx, ref_my;
2129 assert(sizeof(s->block_state) >= 256);
2131 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2135 // clip predictors / edge ?
2137 P_LEFT[0]= left->mx;
2138 P_LEFT[1]= left->my;
2141 P_TOPRIGHT[0]= tr->mx;
2142 P_TOPRIGHT[1]= tr->my;
2144 last_mv[0][0]= s->block[index].mx;
2145 last_mv[0][1]= s->block[index].my;
2146 last_mv[1][0]= right->mx;
2147 last_mv[1][1]= right->my;
2148 last_mv[2][0]= bottom->mx;
2149 last_mv[2][1]= bottom->my;
2156 assert(c-> stride == stride);
2157 assert(c->uvstride == uvstride);
2159 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2160 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2161 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2162 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2164 c->xmin = - x*block_w - 16+3;
2165 c->ymin = - y*block_w - 16+3;
2166 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2167 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2169 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2170 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2171 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2172 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2173 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2174 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2175 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2177 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2178 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2181 c->pred_x= P_LEFT[0];
2182 c->pred_y= P_LEFT[1];
2184 c->pred_x = P_MEDIAN[0];
2185 c->pred_y = P_MEDIAN[1];
2190 for(ref=0; ref<s->ref_frames; ref++){
2191 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2193 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2194 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2196 assert(ref_mx >= c->xmin);
2197 assert(ref_mx <= c->xmax);
2198 assert(ref_my >= c->ymin);
2199 assert(ref_my <= c->ymax);
2201 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2202 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2203 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2204 if(s->ref_mvs[ref]){
2205 s->ref_mvs[ref][index][0]= ref_mx;
2206 s->ref_mvs[ref][index][1]= ref_my;
2207 s->ref_scores[ref][index]= ref_score;
2209 if(score > ref_score){
2216 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2219 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2221 pc.bytestream_start=
2222 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2223 memcpy(p_state, s->block_state, sizeof(s->block_state));
2225 if(level!=s->block_max_depth)
2226 put_rac(&pc, &p_state[4 + s_context], 1);
2227 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2228 if(s->ref_frames > 1)
2229 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2230 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2231 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2232 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2233 p_len= pc.bytestream - pc.bytestream_start;
2234 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2236 block_s= block_w*block_w;
2237 sum = pix_sum(current_data[0], stride, block_w);
2238 l= (sum + block_s/2)/block_s;
2239 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2241 block_s= block_w*block_w>>2;
2242 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2243 cb= (sum + block_s/2)/block_s;
2244 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2245 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2246 cr= (sum + block_s/2)/block_s;
2247 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2250 ic.bytestream_start=
2251 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2252 memcpy(i_state, s->block_state, sizeof(s->block_state));
2253 if(level!=s->block_max_depth)
2254 put_rac(&ic, &i_state[4 + s_context], 1);
2255 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2256 put_symbol(&ic, &i_state[32], l-pl , 1);
2257 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2258 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2259 i_len= ic.bytestream - ic.bytestream_start;
2260 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2262 // assert(score==256*256*256*64-1);
2263 assert(iscore < 255*255*256 + s->lambda2*10);
2264 assert(iscore >= 0);
2265 assert(l>=0 && l<=255);
2266 assert(pl>=0 && pl<=255);
2269 int varc= iscore >> 8;
2270 int vard= score >> 8;
2271 if (vard <= 64 || vard < varc)
2272 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2274 c->scene_change_score+= s->m.qscale;
2277 if(level!=s->block_max_depth){
2278 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2279 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2280 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2281 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2282 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2283 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2285 if(score2 < score && score2 < iscore)
2290 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2291 memcpy(pbbak, i_buffer, i_len);
2293 s->c.bytestream_start= pbbak_start;
2294 s->c.bytestream= pbbak + i_len;
2295 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2296 memcpy(s->block_state, i_state, sizeof(s->block_state));
2299 memcpy(pbbak, p_buffer, p_len);
2301 s->c.bytestream_start= pbbak_start;
2302 s->c.bytestream= pbbak + p_len;
2303 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2304 memcpy(s->block_state, p_state, sizeof(s->block_state));
2309 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2310 const int w= s->b_width << s->block_max_depth;
2311 const int rem_depth= s->block_max_depth - level;
2312 const int index= (x + y*w) << rem_depth;
2313 int trx= (x+1)<<rem_depth;
2314 BlockNode *b= &s->block[index];
2315 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2316 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2317 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2318 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2319 int pl = left->color[0];
2320 int pcb= left->color[1];
2321 int pcr= left->color[2];
2323 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2324 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2325 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2326 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2329 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2333 if(level!=s->block_max_depth){
2334 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2335 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2337 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2338 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2339 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2340 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2341 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2345 if(b->type & BLOCK_INTRA){
2346 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2347 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2348 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2349 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2350 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2351 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2353 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2354 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2355 if(s->ref_frames > 1)
2356 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2357 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2358 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2359 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2363 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2365 Plane *p= &s->plane[plane_index];
2366 const int block_size = MB_SIZE >> s->block_max_depth;
2367 const int block_w = plane_index ? block_size/2 : block_size;
2368 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2369 const int obmc_stride= plane_index ? block_size : 2*block_size;
2370 const int ref_stride= s->current_picture.linesize[plane_index];
2371 uint8_t *src= s-> input_picture.data[plane_index];
2372 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2373 const int b_stride = s->b_width << s->block_max_depth;
2374 const int w= p->width;
2375 const int h= p->height;
2376 int index= mb_x + mb_y*b_stride;
2377 BlockNode *b= &s->block[index];
2378 BlockNode backup= *b;
2382 b->type|= BLOCK_INTRA;
2383 b->color[plane_index]= 0;
2384 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2387 int mb_x2= mb_x + (i &1) - 1;
2388 int mb_y2= mb_y + (i>>1) - 1;
2389 int x= block_w*mb_x2 + block_w/2;
2390 int y= block_w*mb_y2 + block_w/2;
2392 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2393 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2395 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2396 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2397 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2398 int obmc_v= obmc[index];
2400 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2401 if(x<0) obmc_v += obmc[index + block_w];
2402 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2403 if(x+block_w>w) obmc_v += obmc[index - block_w];
2404 //FIXME precalculate this or simplify it somehow else
2406 d = -dst[index] + (1<<(FRAC_BITS-1));
2408 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2409 aa += obmc_v * obmc_v; //FIXME precalculate this
2415 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2418 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2419 const int b_stride = s->b_width << s->block_max_depth;
2420 const int b_height = s->b_height<< s->block_max_depth;
2421 int index= x + y*b_stride;
2422 const BlockNode *b = &s->block[index];
2423 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2424 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2425 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2426 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2428 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2429 // int my_context= av_log2(2*FFABS(left->my - top->my));
2431 if(x<0 || x>=b_stride || y>=b_height)
2438 00001XXXX 15-30 8-15
2440 //FIXME try accurate rate
2441 //FIXME intra and inter predictors if surrounding blocks are not the same type
2442 if(b->type & BLOCK_INTRA){
2443 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2444 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2445 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2447 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2450 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2451 + av_log2(2*FFABS(dmy))
2452 + av_log2(2*b->ref));
2456 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2457 Plane *p= &s->plane[plane_index];
2458 const int block_size = MB_SIZE >> s->block_max_depth;
2459 const int block_w = plane_index ? block_size/2 : block_size;
2460 const int obmc_stride= plane_index ? block_size : 2*block_size;
2461 const int ref_stride= s->current_picture.linesize[plane_index];
2462 uint8_t *dst= s->current_picture.data[plane_index];
2463 uint8_t *src= s-> input_picture.data[plane_index];
2464 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2465 uint8_t *cur = s->scratchbuf;
2466 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2467 const int b_stride = s->b_width << s->block_max_depth;
2468 const int b_height = s->b_height<< s->block_max_depth;
2469 const int w= p->width;
2470 const int h= p->height;
2473 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2474 int sx= block_w*mb_x - block_w/2;
2475 int sy= block_w*mb_y - block_w/2;
2476 int x0= FFMAX(0,-sx);
2477 int y0= FFMAX(0,-sy);
2478 int x1= FFMIN(block_w*2, w-sx);
2479 int y1= FFMIN(block_w*2, h-sy);
2482 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2484 for(y=y0; y<y1; y++){
2485 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2486 const IDWTELEM *pred1 = pred + y*obmc_stride;
2487 uint8_t *cur1 = cur + y*ref_stride;
2488 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2489 for(x=x0; x<x1; x++){
2490 #if FRAC_BITS >= LOG2_OBMC_MAX
2491 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2493 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2495 v = (v + pred1[x]) >> FRAC_BITS;
2496 if(v&(~255)) v= ~(v>>31);
2501 /* copy the regions where obmc[] = (uint8_t)256 */
2502 if(LOG2_OBMC_MAX == 8
2503 && (mb_x == 0 || mb_x == b_stride-1)
2504 && (mb_y == 0 || mb_y == b_height-1)){
2513 for(y=y0; y<y1; y++)
2514 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2518 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2519 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2520 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2521 * So improving the score of one block is not strictly guaranteed
2522 * to improve the score of the whole frame, thus iterative motion
2523 * estimation does not always converge. */
2524 if(s->avctx->me_cmp == FF_CMP_W97)
2525 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2526 else if(s->avctx->me_cmp == FF_CMP_W53)
2527 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2531 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2532 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2537 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2546 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2548 if(mb_x == b_stride-2)
2549 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2551 return distortion + rate*penalty_factor;
2554 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2556 Plane *p= &s->plane[plane_index];
2557 const int block_size = MB_SIZE >> s->block_max_depth;
2558 const int block_w = plane_index ? block_size/2 : block_size;
2559 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2560 const int obmc_stride= plane_index ? block_size : 2*block_size;
2561 const int ref_stride= s->current_picture.linesize[plane_index];
2562 uint8_t *dst= s->current_picture.data[plane_index];
2563 uint8_t *src= s-> input_picture.data[plane_index];
2564 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2565 // const has only been removed from zero_dst to suppress a warning
2566 static IDWTELEM zero_dst[4096]; //FIXME
2567 const int b_stride = s->b_width << s->block_max_depth;
2568 const int w= p->width;
2569 const int h= p->height;
2572 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2575 int mb_x2= mb_x + (i%3) - 1;
2576 int mb_y2= mb_y + (i/3) - 1;
2577 int x= block_w*mb_x2 + block_w/2;
2578 int y= block_w*mb_y2 + block_w/2;
2580 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2581 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2583 //FIXME find a cleaner/simpler way to skip the outside stuff
2584 for(y2= y; y2<0; y2++)
2585 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2586 for(y2= h; y2<y+block_w; y2++)
2587 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2589 for(y2= y; y2<y+block_w; y2++)
2590 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2593 for(y2= y; y2<y+block_w; y2++)
2594 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2597 assert(block_w== 8 || block_w==16);
2598 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2602 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2603 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2611 rate = get_block_bits(s, mb_x, mb_y, 2);
2612 for(i=merged?4:0; i<9; i++){
2613 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2614 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2617 return distortion + rate*penalty_factor;
2620 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2621 const int w= b->width;
2622 const int h= b->height;
2634 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2635 v= src[x + y*stride];
2638 t= src[x + (y-1)*stride];
2640 lt= src[x - 1 + (y-1)*stride];
2643 rt= src[x + 1 + (y-1)*stride];
2647 l= src[x - 1 + y*stride];
2649 if(orientation==1) ll= src[y + (x-2)*stride];
2650 else ll= src[x - 2 + y*stride];
2656 if(px<b->parent->width && py<b->parent->height)
2657 p= parent[px + py*2*stride];
2659 if(!(/*ll|*/l|lt|t|rt|p)){
2661 runs[run_index++]= run;
2669 max_index= run_index;
2670 runs[run_index++]= run;
2672 run= runs[run_index++];
2674 put_symbol2(&s->c, b->state[30], max_index, 0);
2675 if(run_index <= max_index)
2676 put_symbol2(&s->c, b->state[1], run, 3);
2679 if(s->c.bytestream_end - s->c.bytestream < w*40){
2680 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2685 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2686 v= src[x + y*stride];
2689 t= src[x + (y-1)*stride];
2691 lt= src[x - 1 + (y-1)*stride];
2694 rt= src[x + 1 + (y-1)*stride];
2698 l= src[x - 1 + y*stride];
2700 if(orientation==1) ll= src[y + (x-2)*stride];
2701 else ll= src[x - 2 + y*stride];
2707 if(px<b->parent->width && py<b->parent->height)
2708 p= parent[px + py*2*stride];
2710 if(/*ll|*/l|lt|t|rt|p){
2711 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2713 put_rac(&s->c, &b->state[0][context], !!v);
2716 run= runs[run_index++];
2718 if(run_index <= max_index)
2719 put_symbol2(&s->c, b->state[1], run, 3);
2727 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2728 int l2= 2*FFABS(l) + (l<0);
2729 int t2= 2*FFABS(t) + (t<0);
2731 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
2732 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
2740 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2741 // encode_subband_qtree(s, b, src, parent, stride, orientation);
2742 // encode_subband_z0run(s, b, src, parent, stride, orientation);
2743 return encode_subband_c0run(s, b, src, parent, stride, orientation);
2744 // encode_subband_dzr(s, b, src, parent, stride, orientation);
2747 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2748 const int b_stride= s->b_width << s->block_max_depth;
2749 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2750 BlockNode backup= *block;
2751 int rd, index, value;
2753 assert(mb_x>=0 && mb_y>=0);
2754 assert(mb_x<b_stride);
2757 block->color[0] = p[0];
2758 block->color[1] = p[1];
2759 block->color[2] = p[2];
2760 block->type |= BLOCK_INTRA;
2762 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2763 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2764 if(s->me_cache[index] == value)
2766 s->me_cache[index]= value;
2770 block->type &= ~BLOCK_INTRA;
2773 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2785 /* special case for int[2] args we discard afterwards,
2786 * fixes compilation problem with gcc 2.95 */
2787 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2788 int p[2] = {p0, p1};
2789 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2792 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2793 const int b_stride= s->b_width << s->block_max_depth;
2794 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2795 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2796 int rd, index, value;
2798 assert(mb_x>=0 && mb_y>=0);
2799 assert(mb_x<b_stride);
2800 assert(((mb_x|mb_y)&1) == 0);
2802 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2803 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2804 if(s->me_cache[index] == value)
2806 s->me_cache[index]= value;
2811 block->type &= ~BLOCK_INTRA;
2812 block[1]= block[b_stride]= block[b_stride+1]= *block;
2814 rd= get_4block_rd(s, mb_x, mb_y, 0);
2821 block[0]= backup[0];
2822 block[1]= backup[1];
2823 block[b_stride]= backup[2];
2824 block[b_stride+1]= backup[3];
2829 static void iterative_me(SnowContext *s){
2830 int pass, mb_x, mb_y;
2831 const int b_width = s->b_width << s->block_max_depth;
2832 const int b_height= s->b_height << s->block_max_depth;
2833 const int b_stride= b_width;
2837 RangeCoder r = s->c;
2838 uint8_t state[sizeof(s->block_state)];
2839 memcpy(state, s->block_state, sizeof(s->block_state));
2840 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2841 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2842 encode_q_branch(s, 0, mb_x, mb_y);
2844 memcpy(s->block_state, state, sizeof(s->block_state));
2847 for(pass=0; pass<25; pass++){
2850 for(mb_y= 0; mb_y<b_height; mb_y++){
2851 for(mb_x= 0; mb_x<b_width; mb_x++){
2852 int dia_change, i, j, ref;
2853 int best_rd= INT_MAX, ref_rd;
2854 BlockNode backup, ref_b;
2855 const int index= mb_x + mb_y * b_stride;
2856 BlockNode *block= &s->block[index];
2857 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
2858 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
2859 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
2860 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
2861 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
2862 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
2863 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2864 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2865 const int b_w= (MB_SIZE >> s->block_max_depth);
2866 uint8_t obmc_edged[b_w*2][b_w*2];
2868 if(pass && (block->type & BLOCK_OPT))
2870 block->type |= BLOCK_OPT;
2874 if(!s->me_cache_generation)
2875 memset(s->me_cache, 0, sizeof(s->me_cache));
2876 s->me_cache_generation += 1<<22;
2878 //FIXME precalculate
2881 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2883 for(y=0; y<b_w*2; y++)
2884 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2885 if(mb_x==b_stride-1)
2886 for(y=0; y<b_w*2; y++)
2887 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2889 for(x=0; x<b_w*2; x++)
2890 obmc_edged[0][x] += obmc_edged[b_w-1][x];
2891 for(y=1; y<b_w; y++)
2892 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2894 if(mb_y==b_height-1){
2895 for(x=0; x<b_w*2; x++)
2896 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2897 for(y=b_w; y<b_w*2-1; y++)
2898 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2902 //skip stuff outside the picture
2903 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
2904 uint8_t *src= s-> input_picture.data[0];
2905 uint8_t *dst= s->current_picture.data[0];
2906 const int stride= s->current_picture.linesize[0];
2907 const int block_w= MB_SIZE >> s->block_max_depth;
2908 const int sx= block_w*mb_x - block_w/2;
2909 const int sy= block_w*mb_y - block_w/2;
2910 const int w= s->plane[0].width;
2911 const int h= s->plane[0].height;
2915 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2916 for(y=h; y<sy+block_w*2; y++)
2917 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2919 for(y=sy; y<sy+block_w*2; y++)
2920 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
2922 if(sx+block_w*2 > w){
2923 for(y=sy; y<sy+block_w*2; y++)
2924 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
2928 // intra(black) = neighbors' contribution to the current block
2930 color[i]= get_dc(s, mb_x, mb_y, i);
2932 // get previous score (cannot be cached due to OBMC)
2933 if(pass > 0 && (block->type&BLOCK_INTRA)){
2934 int color0[3]= {block->color[0], block->color[1], block->color[2]};
2935 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
2937 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
2941 for(ref=0; ref < s->ref_frames; ref++){
2942 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
2943 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
2948 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
2949 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
2951 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
2953 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
2955 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
2957 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
2960 //FIXME avoid subpel interpolation / round to nearest integer
2963 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
2965 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2966 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2967 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2968 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2974 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
2977 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
2979 //FIXME or try the standard 2 pass qpel or similar
2981 mvr[0][0]= block->mx;
2982 mvr[0][1]= block->my;
2983 if(ref_rd > best_rd){
2990 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
2991 //FIXME RD style color selection
2992 if(!same_block(block, &backup)){
2993 if(tb ) tb ->type &= ~BLOCK_OPT;
2994 if(lb ) lb ->type &= ~BLOCK_OPT;
2995 if(rb ) rb ->type &= ~BLOCK_OPT;
2996 if(bb ) bb ->type &= ~BLOCK_OPT;
2997 if(tlb) tlb->type &= ~BLOCK_OPT;
2998 if(trb) trb->type &= ~BLOCK_OPT;
2999 if(blb) blb->type &= ~BLOCK_OPT;
3000 if(brb) brb->type &= ~BLOCK_OPT;
3005 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3010 if(s->block_max_depth == 1){
3012 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3013 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3015 int best_rd, init_rd;
3016 const int index= mb_x + mb_y * b_stride;
3019 b[0]= &s->block[index];
3021 b[2]= b[0]+b_stride;
3023 if(same_block(b[0], b[1]) &&
3024 same_block(b[0], b[2]) &&
3025 same_block(b[0], b[3]))
3028 if(!s->me_cache_generation)
3029 memset(s->me_cache, 0, sizeof(s->me_cache));
3030 s->me_cache_generation += 1<<22;
3032 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3034 //FIXME more multiref search?
3035 check_4block_inter(s, mb_x, mb_y,
3036 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3037 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3040 if(!(b[i]->type&BLOCK_INTRA))
3041 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3043 if(init_rd != best_rd)
3047 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3051 static void encode_blocks(SnowContext *s, int search){
3056 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3060 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3061 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3065 if(s->avctx->me_method == ME_ITER || !search)
3066 encode_q_branch2(s, 0, x, y);
3068 encode_q_branch (s, 0, x, y);
3073 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3074 const int w= b->width;
3075 const int h= b->height;
3076 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3077 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3078 int x,y, thres1, thres2;
3080 if(s->qlog == LOSSLESS_QLOG){
3083 dst[x + y*stride]= src[x + y*stride];
3087 bias= bias ? 0 : (3*qmul)>>3;
3088 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3094 int i= src[x + y*stride];
3096 if((unsigned)(i+thres1) > thres2){
3099 i/= qmul; //FIXME optimize
3100 dst[x + y*stride]= i;
3104 i/= qmul; //FIXME optimize
3105 dst[x + y*stride]= -i;
3108 dst[x + y*stride]= 0;
3114 int i= src[x + y*stride];
3116 if((unsigned)(i+thres1) > thres2){
3119 i= (i + bias) / qmul; //FIXME optimize
3120 dst[x + y*stride]= i;
3124 i= (i + bias) / qmul; //FIXME optimize
3125 dst[x + y*stride]= -i;
3128 dst[x + y*stride]= 0;
3134 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3135 const int w= b->width;
3136 const int h= b->height;
3137 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3138 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3139 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3142 if(s->qlog == LOSSLESS_QLOG) return;
3146 int i= src[x + y*stride];
3148 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3150 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3156 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3157 const int w= b->width;
3158 const int h= b->height;
3161 for(y=h-1; y>=0; y--){
3162 for(x=w-1; x>=0; x--){
3163 int i= x + y*stride;
3167 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3168 else src[i] -= src[i - 1];
3170 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3171 else src[i] -= src[i - 1];
3174 if(y) src[i] -= src[i - stride];
3180 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3181 const int w= b->width;
3182 const int h= b->height;
3187 int i= x + y*stride;
3191 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3192 else src[i] += src[i - 1];
3194 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3195 else src[i] += src[i - 1];
3198 if(y) src[i] += src[i - stride];
3204 static void encode_qlogs(SnowContext *s){
3205 int plane_index, level, orientation;
3207 for(plane_index=0; plane_index<2; plane_index++){
3208 for(level=0; level<s->spatial_decomposition_count; level++){
3209 for(orientation=level ? 1:0; orientation<4; orientation++){
3210 if(orientation==2) continue;
3211 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3217 static void encode_header(SnowContext *s){
3221 memset(kstate, MID_STATE, sizeof(kstate));
3223 put_rac(&s->c, kstate, s->keyframe);
3224 if(s->keyframe || s->always_reset){
3226 s->last_spatial_decomposition_type=
3230 s->last_block_max_depth= 0;
3231 for(plane_index=0; plane_index<2; plane_index++){
3232 Plane *p= &s->plane[plane_index];
3235 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3239 put_symbol(&s->c, s->header_state, s->version, 0);
3240 put_rac(&s->c, s->header_state, s->always_reset);
3241 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3242 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3243 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3244 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3245 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3246 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3247 put_rac(&s->c, s->header_state, s->spatial_scalability);
3248 // put_rac(&s->c, s->header_state, s->rate_scalability);
3249 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3256 for(plane_index=0; plane_index<2; plane_index++){
3257 Plane *p= &s->plane[plane_index];
3258 update_mc |= p->last_htaps != p->htaps;
3259 update_mc |= p->last_diag_mc != p->diag_mc;
3260 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3262 put_rac(&s->c, s->header_state, update_mc);
3264 for(plane_index=0; plane_index<2; plane_index++){
3265 Plane *p= &s->plane[plane_index];
3266 put_rac(&s->c, s->header_state, p->diag_mc);
3267 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3268 for(i= p->htaps/2; i; i--)
3269 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3272 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3273 put_rac(&s->c, s->header_state, 1);
3274 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3277 put_rac(&s->c, s->header_state, 0);
3280 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3281 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3282 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3283 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3284 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3288 static void update_last_header_values(SnowContext *s){
3292 for(plane_index=0; plane_index<2; plane_index++){
3293 Plane *p= &s->plane[plane_index];
3294 p->last_diag_mc= p->diag_mc;
3295 p->last_htaps = p->htaps;
3296 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3300 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3301 s->last_qlog = s->qlog;
3302 s->last_qbias = s->qbias;
3303 s->last_mv_scale = s->mv_scale;
3304 s->last_block_max_depth = s->block_max_depth;
3305 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3308 static int qscale2qlog(int qscale){
3309 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3310 + 61*QROOT/8; ///< 64 > 60
3313 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3315 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3316 * FIXME we know exact mv bits at this point,
3317 * but ratecontrol isn't set up to include them. */
3318 uint32_t coef_sum= 0;
3319 int level, orientation, delta_qlog;
3321 for(level=0; level<s->spatial_decomposition_count; level++){
3322 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3323 SubBand *b= &s->plane[0].band[level][orientation];
3324 IDWTELEM *buf= b->ibuf;
3325 const int w= b->width;
3326 const int h= b->height;
3327 const int stride= b->stride;
3328 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3329 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3330 const int qdiv= (1<<16)/qmul;
3332 //FIXME this is ugly
3335 buf[x+y*stride]= b->buf[x+y*stride];
3337 decorrelate(s, b, buf, stride, 1, 0);
3340 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3344 /* ugly, ratecontrol just takes a sqrt again */
3345 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3346 assert(coef_sum < INT_MAX);
3348 if(pict->pict_type == AV_PICTURE_TYPE_I){
3349 s->m.current_picture.mb_var_sum= coef_sum;
3350 s->m.current_picture.mc_mb_var_sum= 0;
3352 s->m.current_picture.mc_mb_var_sum= coef_sum;
3353 s->m.current_picture.mb_var_sum= 0;
3356 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3357 if (pict->quality < 0)
3359 s->lambda= pict->quality * 3/2;
3360 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3361 s->qlog+= delta_qlog;
3365 static void calculate_visual_weight(SnowContext *s, Plane *p){
3366 int width = p->width;
3367 int height= p->height;
3368 int level, orientation, x, y;
3370 for(level=0; level<s->spatial_decomposition_count; level++){
3371 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3372 SubBand *b= &p->band[level][orientation];
3373 IDWTELEM *ibuf= b->ibuf;
3376 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3377 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3378 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3379 for(y=0; y<height; y++){
3380 for(x=0; x<width; x++){
3381 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3386 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3391 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3392 SnowContext *s = avctx->priv_data;
3393 RangeCoder * const c= &s->c;
3394 AVFrame *pict = data;
3395 const int width= s->avctx->width;
3396 const int height= s->avctx->height;
3397 int level, orientation, plane_index, i, y;
3398 uint8_t rc_header_bak[sizeof(s->header_state)];
3399 uint8_t rc_block_bak[sizeof(s->block_state)];
3401 ff_init_range_encoder(c, buf, buf_size);
3402 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3406 for(y=0; y<(height>>shift); y++)
3407 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3408 &pict->data[i][y * pict->linesize[i]],
3411 s->new_picture = *pict;
3413 s->m.picture_number= avctx->frame_number;
3414 if(avctx->flags&CODEC_FLAG_PASS2){
3416 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3417 s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
3418 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3419 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3420 if (pict->quality < 0)
3424 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3426 pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3429 if(s->pass1_rc && avctx->frame_number == 0)
3430 pict->quality= 2*FF_QP2LAMBDA;
3432 s->qlog= qscale2qlog(pict->quality);
3433 s->lambda = pict->quality * 3/2;
3435 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3436 s->qlog= LOSSLESS_QLOG;
3438 }//else keep previous frame's qlog until after motion estimation
3442 s->m.current_picture_ptr= &s->m.current_picture;
3443 s->m.last_picture.f.pts = s->m.current_picture.f.pts;
3444 s->m.current_picture.f.pts = pict->pts;
3445 if(pict->pict_type == AV_PICTURE_TYPE_P){
3446 int block_width = (width +15)>>4;
3447 int block_height= (height+15)>>4;
3448 int stride= s->current_picture.linesize[0];
3450 assert(s->current_picture.data[0]);
3451 assert(s->last_picture[0].data[0]);
3453 s->m.avctx= s->avctx;
3454 s->m.current_picture.f.data[0] = s->current_picture.data[0];
3455 s->m. last_picture.f.data[0] = s->last_picture[0].data[0];
3456 s->m. new_picture.f.data[0] = s-> input_picture.data[0];
3457 s->m. last_picture_ptr= &s->m. last_picture;
3459 s->m. last_picture.f.linesize[0] =
3460 s->m. new_picture.f.linesize[0] =
3461 s->m.current_picture.f.linesize[0] = stride;
3462 s->m.uvlinesize= s->current_picture.linesize[1];
3464 s->m.height= height;
3465 s->m.mb_width = block_width;
3466 s->m.mb_height= block_height;
3467 s->m.mb_stride= s->m.mb_width+1;
3468 s->m.b8_stride= 2*s->m.mb_width+1;
3470 s->m.pict_type= pict->pict_type;
3471 s->m.me_method= s->avctx->me_method;
3472 s->m.me.scene_change_score=0;
3473 s->m.flags= s->avctx->flags;
3474 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3475 s->m.out_format= FMT_H263;
3476 s->m.unrestricted_mv= 1;
3478 s->m.lambda = s->lambda;
3479 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3480 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3482 s->m.dsp= s->dsp; //move
3488 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3489 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3494 if(pict->pict_type == AV_PICTURE_TYPE_I)
3495 s->spatial_decomposition_count= 5;
3497 s->spatial_decomposition_count= 5;
3499 s->m.pict_type = pict->pict_type;
3500 s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
3502 common_init_after_header(avctx);
3504 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3505 for(plane_index=0; plane_index<3; plane_index++){
3506 calculate_visual_weight(s, &s->plane[plane_index]);
3511 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3512 encode_blocks(s, 1);
3513 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3515 for(plane_index=0; plane_index<3; plane_index++){
3516 Plane *p= &s->plane[plane_index];
3520 // int bits= put_bits_count(&s->c.pb);
3522 if (!s->memc_only) {
3524 if(pict->data[plane_index]) //FIXME gray hack
3527 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3530 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3533 && pict->pict_type == AV_PICTURE_TYPE_P
3534 && !(avctx->flags&CODEC_FLAG_PASS2)
3535 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3536 ff_init_range_encoder(c, buf, buf_size);
3537 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3538 pict->pict_type= AV_PICTURE_TYPE_I;
3540 s->current_picture.key_frame=1;
3544 if(s->qlog == LOSSLESS_QLOG){
3547 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3553 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3559 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3561 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3563 if(s->pass1_rc && plane_index==0){
3564 int delta_qlog = ratecontrol_1pass(s, pict);
3565 if (delta_qlog <= INT_MIN)
3568 //reordering qlog in the bitstream would eliminate this reset
3569 ff_init_range_encoder(c, buf, buf_size);
3570 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3571 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3573 encode_blocks(s, 0);
3577 for(level=0; level<s->spatial_decomposition_count; level++){
3578 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3579 SubBand *b= &p->band[level][orientation];
3582 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3584 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
3585 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3586 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3588 correlate(s, b, b->ibuf, b->stride, 1, 0);
3592 for(level=0; level<s->spatial_decomposition_count; level++){
3593 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3594 SubBand *b= &p->band[level][orientation];
3596 dequantize(s, b, b->ibuf, b->stride);
3600 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3601 if(s->qlog == LOSSLESS_QLOG){
3604 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3608 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3611 if(pict->pict_type == AV_PICTURE_TYPE_I){
3614 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3615 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3619 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3620 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3623 if(s->avctx->flags&CODEC_FLAG_PSNR){
3626 if(pict->data[plane_index]) //FIXME gray hack
3629 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3633 s->avctx->error[plane_index] += error;
3634 s->current_picture.error[plane_index] = error;
3639 update_last_header_values(s);
3641 release_buffer(avctx);
3643 s->current_picture.coded_picture_number = avctx->frame_number;
3644 s->current_picture.pict_type = pict->pict_type;
3645 s->current_picture.quality = pict->quality;
3646 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3647 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3648 s->m.current_picture.f.display_picture_number =
3649 s->m.current_picture.f.coded_picture_number = avctx->frame_number;
3650 s->m.current_picture.f.quality = pict->quality;
3651 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3653 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3655 if(avctx->flags&CODEC_FLAG_PASS1)
3656 ff_write_pass1_stats(&s->m);
3657 s->m.last_pict_type = s->m.pict_type;
3658 avctx->frame_bits = s->m.frame_bits;
3659 avctx->mv_bits = s->m.mv_bits;
3660 avctx->misc_bits = s->m.misc_bits;
3661 avctx->p_tex_bits = s->m.p_tex_bits;
3665 return ff_rac_terminate(c);
3668 static av_cold int encode_end(AVCodecContext *avctx)
3670 SnowContext *s = avctx->priv_data;
3673 if (s->input_picture.data[0])
3674 avctx->release_buffer(avctx, &s->input_picture);
3675 av_free(avctx->stats_out);
3680 #define OFFSET(x) offsetof(SnowContext, x)
3681 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
3682 static const AVOption options[] = {
3683 { "memc_only", "Only do ME/MC (I frames -> ref, P frame -> ME+MC).", OFFSET(memc_only), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3687 static const AVClass snowenc_class = {
3688 .class_name = "snow encoder",
3689 .item_name = av_default_item_name,
3691 .version = LIBAVUTIL_VERSION_INT,
3694 AVCodec ff_snow_encoder = {
3696 .type = AVMEDIA_TYPE_VIDEO,
3697 .id = CODEC_ID_SNOW,
3698 .priv_data_size = sizeof(SnowContext),
3699 .init = encode_init,
3700 .encode = encode_frame,
3701 .close = encode_end,
3702 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3703 .priv_class = &snowenc_class,
3713 #include "libavutil/lfg.h"
3714 #include "libavutil/mathematics.h"
3719 int buffer[2][width*height];
3723 s.spatial_decomposition_count=6;
3724 s.spatial_decomposition_type=1;
3726 av_lfg_init(&prng, 1);
3728 printf("testing 5/3 DWT\n");
3729 for(i=0; i<width*height; i++)
3730 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3732 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3733 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3735 for(i=0; i<width*height; i++)
3736 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3738 printf("testing 9/7 DWT\n");
3739 s.spatial_decomposition_type=0;
3740 for(i=0; i<width*height; i++)
3741 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3743 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3744 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3746 for(i=0; i<width*height; i++)
3747 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3750 int level, orientation, x, y;
3751 int64_t errors[8][4];
3754 memset(errors, 0, sizeof(errors));
3755 s.spatial_decomposition_count=3;
3756 s.spatial_decomposition_type=0;
3757 for(level=0; level<s.spatial_decomposition_count; level++){
3758 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3759 int w= width >> (s.spatial_decomposition_count-level);
3760 int h= height >> (s.spatial_decomposition_count-level);
3761 int stride= width << (s.spatial_decomposition_count-level);
3762 DWTELEM *buf= buffer[0];
3765 if(orientation&1) buf+=w;
3766 if(orientation>1) buf+=stride>>1;
3768 memset(buffer[0], 0, sizeof(int)*width*height);
3769 buf[w/2 + h/2*stride]= 256*256;
3770 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3771 for(y=0; y<height; y++){
3772 for(x=0; x<width; x++){
3773 int64_t d= buffer[0][x + y*width];
3775 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
3777 if(FFABS(height/2-y)<9 && level==2) printf("\n");
3779 error= (int)(sqrt(error)+0.5);
3780 errors[level][orientation]= error;
3781 if(g) g=av_gcd(g, error);
3785 printf("static int const visual_weight[][4]={\n");
3786 for(level=0; level<s.spatial_decomposition_count; level++){
3788 for(orientation=0; orientation<4; orientation++){
3789 printf("%8"PRId64",", errors[level][orientation]/g);
3796 int w= width >> (s.spatial_decomposition_count-level);
3797 //int h= height >> (s.spatial_decomposition_count-level);
3798 int stride= width << (s.spatial_decomposition_count-level);
3799 DWTELEM *buf= buffer[0];
3805 memset(buffer[0], 0, sizeof(int)*width*height);
3806 for(y=0; y<height; y++){
3807 for(x=0; x<width; x++){
3808 int tab[4]={0,2,3,1};
3809 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3812 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3813 for(y=0; y<height; y++){
3814 for(x=0; x<width; x++){
3815 int64_t d= buffer[0][x + y*width];
3817 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
3819 if(FFABS(height/2-y)<9) printf("\n");