2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
29 #include "rangecoder.h"
32 #include "mpegvideo.h"
38 static const int8_t quant3bA[256]={
39 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
40 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
41 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
42 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
43 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
44 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
45 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
46 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
47 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
48 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
49 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
50 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
51 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
52 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
53 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
54 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
57 static const uint8_t obmc32[1024]={
58 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
59 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
60 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
61 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
62 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
63 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
64 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
65 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
66 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
67 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
68 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
69 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
70 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
71 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
72 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
73 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
74 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
75 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
76 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
77 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
78 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
79 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
80 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
81 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
82 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
83 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
84 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
85 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
86 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
87 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
88 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
89 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
92 static const uint8_t obmc16[256]={
93 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
94 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
95 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
96 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
97 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
98 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
99 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
100 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
101 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
102 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
103 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
104 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
105 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
106 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
107 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
108 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
113 static const uint8_t obmc8[64]={
114 4, 12, 20, 28, 28, 20, 12, 4,
115 12, 36, 60, 84, 84, 60, 36, 12,
116 20, 60,100,140,140,100, 60, 20,
117 28, 84,140,196,196,140, 84, 28,
118 28, 84,140,196,196,140, 84, 28,
119 20, 60,100,140,140,100, 60, 20,
120 12, 36, 60, 84, 84, 60, 36, 12,
121 4, 12, 20, 28, 28, 20, 12, 4,
126 static const uint8_t obmc4[16]={
134 static const uint8_t * const obmc_tab[4]={
135 obmc32, obmc16, obmc8, obmc4
138 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
140 typedef struct BlockNode{
146 //#define TYPE_SPLIT 1
147 #define BLOCK_INTRA 1
149 //#define TYPE_NOCOLOR 4
150 uint8_t level; //FIXME merge into type?
153 static const BlockNode null_block= { //FIXME add border maybe
154 .color= {128,128,128},
162 #define LOG2_MB_SIZE 4
163 #define MB_SIZE (1<<LOG2_MB_SIZE)
164 #define ENCODER_EXTRA_BITS 4
167 typedef struct x_and_coeff{
172 typedef struct SubBand{
177 int qlog; ///< log(qscale)/log[2^(1/6)]
182 int stride_line; ///< Stride measured in lines, not pixels.
183 x_and_coeff * x_coeff;
184 struct SubBand *parent;
185 uint8_t state[/*7*2*/ 7 + 512][32];
188 typedef struct Plane{
191 SubBand band[MAX_DECOMPOSITIONS][4];
194 int8_t hcoeff[HTAPS_MAX/2];
199 int8_t last_hcoeff[HTAPS_MAX/2];
203 typedef struct SnowContext{
205 AVCodecContext *avctx;
210 AVFrame input_picture; ///< new_picture with the internal linesizes
211 AVFrame current_picture;
212 AVFrame last_picture[MAX_REF_FRAMES];
213 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
214 AVFrame mconly_picture;
215 // uint8_t q_context[16];
216 uint8_t header_state[32];
217 uint8_t block_state[128 + 32*128];
221 int spatial_decomposition_type;
222 int last_spatial_decomposition_type;
223 int temporal_decomposition_type;
224 int spatial_decomposition_count;
225 int last_spatial_decomposition_count;
226 int temporal_decomposition_count;
229 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
230 uint32_t *ref_scores[MAX_REF_FRAMES];
231 DWTELEM *spatial_dwt_buffer;
232 IDWTELEM *spatial_idwt_buffer;
236 int spatial_scalability;
246 #define QBIAS_SHIFT 3
250 int last_block_max_depth;
251 Plane plane[MAX_PLANES];
253 #define ME_CACHE_SIZE 1024
254 int me_cache[ME_CACHE_SIZE];
255 int me_cache_generation;
259 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
265 // Avoid a name clash on SGI IRIX
268 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
269 static uint8_t qexp[QROOT];
271 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
275 const int a= FFABS(v);
276 const int e= av_log2(a);
277 const int el= FFMIN(e, 10);
278 put_rac(c, state+0, 0);
281 put_rac(c, state+1+i, 1); //1..10
284 put_rac(c, state+1+9, 1); //1..10
286 put_rac(c, state+1+FFMIN(i,9), 0);
288 for(i=e-1; i>=el; i--){
289 put_rac(c, state+22+9, (a>>i)&1); //22..31
292 put_rac(c, state+22+i, (a>>i)&1); //22..31
296 put_rac(c, state+11 + el, v < 0); //11..21
298 put_rac(c, state+0, 1);
302 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
303 if(get_rac(c, state+0))
308 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
313 for(i=e-1; i>=0; i--){
314 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
317 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
322 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
324 int r= log2>=0 ? 1<<log2 : 1;
330 put_rac(c, state+4+log2, 1);
335 put_rac(c, state+4+log2, 0);
337 for(i=log2-1; i>=0; i--){
338 put_rac(c, state+31-i, (v>>i)&1);
342 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
344 int r= log2>=0 ? 1<<log2 : 1;
349 while(get_rac(c, state+4+log2)){
355 for(i=log2-1; i>=0; i--){
356 v+= get_rac(c, state+31-i)<<i;
362 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
363 const int w= b->width;
364 const int h= b->height;
368 x_and_coeff *xc= b->x_coeff;
369 x_and_coeff *prev_xc= NULL;
370 x_and_coeff *prev2_xc= xc;
371 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
372 x_and_coeff *prev_parent_xc= parent_xc;
374 runs= get_symbol2(&s->c, b->state[30], 0);
375 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
382 if(y && prev_xc->x == 0){
394 if(prev_xc->x == x + 1)
400 if(x>>1 > parent_xc->x){
403 if(x>>1 == parent_xc->x){
407 if(/*ll|*/l|lt|t|rt|p){
408 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
410 v=get_rac(&s->c, &b->state[0][context]);
412 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
413 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
420 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
422 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
423 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
432 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
433 else max_run= FFMIN(run, w-x-1);
435 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
441 (xc++)->x= w+1; //end marker
447 while(parent_xc->x != parent->width+1)
450 prev_parent_xc= parent_xc;
452 parent_xc= prev_parent_xc;
457 (xc++)->x= w+1; //end marker
460 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
461 const int w= b->width;
463 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
464 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
465 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
468 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
473 /* If we are on the second or later slice, restore our index. */
475 new_index = save_state[0];
478 for(y=start_y; y<h; y++){
481 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
482 memset(line, 0, b->width*sizeof(IDWTELEM));
483 v = b->x_coeff[new_index].coeff;
484 x = b->x_coeff[new_index++].x;
486 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
487 register int u= -(v&1);
490 v = b->x_coeff[new_index].coeff;
491 x = b->x_coeff[new_index++].x;
495 /* Save our variables for the next slice. */
496 save_state[0] = new_index;
501 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
502 int plane_index, level, orientation;
504 for(plane_index=0; plane_index<3; plane_index++){
505 for(level=0; level<MAX_DECOMPOSITIONS; level++){
506 for(orientation=level ? 1:0; orientation<4; orientation++){
507 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
511 memset(s->header_state, MID_STATE, sizeof(s->header_state));
512 memset(s->block_state, MID_STATE, sizeof(s->block_state));
515 static int alloc_blocks(SnowContext *s){
516 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
517 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
523 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
527 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
528 const int w= s->b_width << s->block_max_depth;
529 const int rem_depth= s->block_max_depth - level;
530 const int index= (x + y*w) << rem_depth;
531 const int block_w= 1<<rem_depth;
544 for(j=0; j<block_w; j++){
545 for(i=0; i<block_w; i++){
546 s->block[index + i + j*w]= block;
551 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
552 const int offset[3]= {
554 ((y*c->uvstride + x)>>1),
555 ((y*c->uvstride + x)>>1),
559 c->src[0][i]= src [i];
560 c->ref[0][i]= ref [i] + offset[i];
565 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
566 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
567 if(s->ref_frames == 1){
568 *mx = mid_pred(left->mx, top->mx, tr->mx);
569 *my = mid_pred(left->my, top->my, tr->my);
571 const int *scale = scale_mv_ref[ref];
572 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
573 (top ->mx * scale[top ->ref] + 128) >>8,
574 (tr ->mx * scale[tr ->ref] + 128) >>8);
575 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
576 (top ->my * scale[top ->ref] + 128) >>8,
577 (tr ->my * scale[tr ->ref] + 128) >>8);
581 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
582 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
583 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
585 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
589 static void decode_q_branch(SnowContext *s, int level, int x, int y){
590 const int w= s->b_width << s->block_max_depth;
591 const int rem_depth= s->block_max_depth - level;
592 const int index= (x + y*w) << rem_depth;
593 int trx= (x+1)<<rem_depth;
594 const BlockNode *left = x ? &s->block[index-1] : &null_block;
595 const BlockNode *top = y ? &s->block[index-w] : &null_block;
596 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
597 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
598 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
601 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
605 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
607 int l = left->color[0];
608 int cb= left->color[1];
609 int cr= left->color[2];
611 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
612 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
613 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
615 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
618 pred_mv(s, &mx, &my, 0, left, top, tr);
619 l += get_symbol(&s->c, &s->block_state[32], 1);
620 cb+= get_symbol(&s->c, &s->block_state[64], 1);
621 cr+= get_symbol(&s->c, &s->block_state[96], 1);
623 if(s->ref_frames > 1)
624 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
625 pred_mv(s, &mx, &my, ref, left, top, tr);
626 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
627 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
629 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
631 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
632 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
633 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
634 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
638 static void decode_blocks(SnowContext *s){
645 decode_q_branch(s, 0, x, y);
650 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
651 static const uint8_t weight[64]={
662 static const uint8_t brane[256]={
663 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
664 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
665 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
666 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
667 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
668 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
669 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
670 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
671 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
672 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
673 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
674 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
675 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
676 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
677 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
678 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
681 static const uint8_t needs[16]={
689 int16_t tmpIt [64*(32+HTAPS_MAX)];
690 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
691 int16_t *tmpI= tmpIt;
692 uint8_t *tmp2= tmp2t[0];
693 const uint8_t *hpel[11];
694 assert(dx<16 && dy<16);
695 r= brane[dx + 16*dy]&15;
696 l= brane[dx + 16*dy]>>4;
698 b= needs[l] | needs[r];
703 for(y=0; y < b_h+HTAPS_MAX-1; y++){
704 for(x=0; x < b_w; x++){
705 int a_1=src[x + HTAPS_MAX/2-4];
706 int a0= src[x + HTAPS_MAX/2-3];
707 int a1= src[x + HTAPS_MAX/2-2];
708 int a2= src[x + HTAPS_MAX/2-1];
709 int a3= src[x + HTAPS_MAX/2+0];
710 int a4= src[x + HTAPS_MAX/2+1];
711 int a5= src[x + HTAPS_MAX/2+2];
712 int a6= src[x + HTAPS_MAX/2+3];
714 if(!p || p->fast_mc){
715 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
719 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
724 if(am&(~255)) am= ~(am>>31);
733 src += HTAPS_MAX/2 - 1;
737 for(y=0; y < b_h; y++){
738 for(x=0; x < b_w+1; x++){
739 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
740 int a0= src[x + (HTAPS_MAX/2-3)*stride];
741 int a1= src[x + (HTAPS_MAX/2-2)*stride];
742 int a2= src[x + (HTAPS_MAX/2-1)*stride];
743 int a3= src[x + (HTAPS_MAX/2+0)*stride];
744 int a4= src[x + (HTAPS_MAX/2+1)*stride];
745 int a5= src[x + (HTAPS_MAX/2+2)*stride];
746 int a6= src[x + (HTAPS_MAX/2+3)*stride];
749 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
751 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
753 if(am&(~255)) am= ~(am>>31);
761 src += stride*(HTAPS_MAX/2 - 1);
765 for(y=0; y < b_h; y++){
766 for(x=0; x < b_w; x++){
767 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
768 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
769 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
770 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
771 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
772 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
773 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
774 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
777 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
779 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
780 if(am&(~255)) am= ~(am>>31);
789 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
794 hpel[ 6]= tmp2t[1] + 1;
796 hpel[ 8]= src + stride;
797 hpel[ 9]= hpel[1] + stride;
798 hpel[10]= hpel[8] + 1;
801 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
802 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
803 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
804 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
807 for(y=0; y < b_h; y++){
808 for(x=0; x < b_w; x++){
809 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
810 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
819 const uint8_t *src1= hpel[l];
820 const uint8_t *src2= hpel[r];
821 int a= weight[((dx&7) + (8*(dy&7)))];
823 for(y=0; y < b_h; y++){
824 for(x=0; x < b_w; x++){
825 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
834 #define mca(dx,dy,b_w)\
835 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
837 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
849 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
850 if(block->type & BLOCK_INTRA){
852 const int color = block->color[plane_index];
853 const int color4= color*0x01010101;
855 for(y=0; y < b_h; y++){
856 *(uint32_t*)&dst[0 + y*stride]= color4;
857 *(uint32_t*)&dst[4 + y*stride]= color4;
858 *(uint32_t*)&dst[8 + y*stride]= color4;
859 *(uint32_t*)&dst[12+ y*stride]= color4;
860 *(uint32_t*)&dst[16+ y*stride]= color4;
861 *(uint32_t*)&dst[20+ y*stride]= color4;
862 *(uint32_t*)&dst[24+ y*stride]= color4;
863 *(uint32_t*)&dst[28+ y*stride]= color4;
866 for(y=0; y < b_h; y++){
867 *(uint32_t*)&dst[0 + y*stride]= color4;
868 *(uint32_t*)&dst[4 + y*stride]= color4;
869 *(uint32_t*)&dst[8 + y*stride]= color4;
870 *(uint32_t*)&dst[12+ y*stride]= color4;
873 for(y=0; y < b_h; y++){
874 *(uint32_t*)&dst[0 + y*stride]= color4;
875 *(uint32_t*)&dst[4 + y*stride]= color4;
878 for(y=0; y < b_h; y++){
879 *(uint32_t*)&dst[0 + y*stride]= color4;
882 for(y=0; y < b_h; y++){
883 for(x=0; x < b_w; x++){
884 dst[x + y*stride]= color;
889 uint8_t *src= s->last_picture[block->ref].data[plane_index];
890 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
891 int mx= block->mx*scale;
892 int my= block->my*scale;
895 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
896 sx += (mx>>4) - (HTAPS_MAX/2-1);
897 sy += (my>>4) - (HTAPS_MAX/2-1);
898 src += sx + sy*stride;
899 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
900 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
901 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
904 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
905 // assert(!(b_w&(b_w-1)));
906 assert(b_w>1 && b_h>1);
907 assert((tab_index>=0 && tab_index<4) || b_w==32);
908 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
909 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
912 for(y=0; y<b_h; y+=16){
913 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
914 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
917 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
919 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
920 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
923 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
924 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
929 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
930 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
933 for(y=0; y<b_h; y++){
934 //FIXME ugly misuse of obmc_stride
935 const uint8_t *obmc1= obmc + y*obmc_stride;
936 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
937 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
938 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
939 dst = slice_buffer_get_line(sb, src_y + y);
940 for(x=0; x<b_w; x++){
941 int v= obmc1[x] * block[3][x + y*src_stride]
942 +obmc2[x] * block[2][x + y*src_stride]
943 +obmc3[x] * block[1][x + y*src_stride]
944 +obmc4[x] * block[0][x + y*src_stride];
946 v <<= 8 - LOG2_OBMC_MAX;
952 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
953 if(v&(~255)) v= ~(v>>31);
954 dst8[x + y*src_stride] = v;
962 //FIXME name cleanup (b_w, block_w, b_width stuff)
963 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
964 const int b_width = s->b_width << s->block_max_depth;
965 const int b_height= s->b_height << s->block_max_depth;
966 const int b_stride= b_width;
967 BlockNode *lt= &s->block[b_x + b_y*b_stride];
969 BlockNode *lb= lt+b_stride;
972 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
973 uint8_t *tmp = s->scratchbuf;
980 }else if(b_x + 1 >= b_width){
987 }else if(b_y + 1 >= b_height){
992 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
995 if(!sliced && !offset_dst)
998 }else if(src_x + b_w > w){
1002 obmc -= src_y*obmc_stride;
1004 if(!sliced && !offset_dst)
1005 dst -= src_y*dst_stride;
1007 }else if(src_y + b_h> h){
1011 if(b_w<=0 || b_h<=0) return;
1013 assert(src_stride > 2*MB_SIZE + 5);
1015 if(!sliced && offset_dst)
1016 dst += src_x + src_y*dst_stride;
1017 dst8+= src_x + src_y*src_stride;
1018 // src += src_x + src_y*src_stride;
1020 ptmp= tmp + 3*tmp_step;
1023 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1025 if(same_block(lt, rt)){
1030 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1033 if(same_block(lt, lb)){
1035 }else if(same_block(rt, lb)){
1040 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1043 if(same_block(lt, rb) ){
1045 }else if(same_block(rt, rb)){
1047 }else if(same_block(lb, rb)){
1051 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1054 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1056 for(y=0; y<b_h; y++){
1057 //FIXME ugly misuse of obmc_stride
1058 const uint8_t *obmc1= obmc + y*obmc_stride;
1059 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1060 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1061 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1062 for(x=0; x<b_w; x++){
1063 int v= obmc1[x] * block[3][x + y*src_stride]
1064 +obmc2[x] * block[2][x + y*src_stride]
1065 +obmc3[x] * block[1][x + y*src_stride]
1066 +obmc4[x] * block[0][x + y*src_stride];
1068 v <<= 8 - LOG2_OBMC_MAX;
1070 v >>= 8 - FRAC_BITS;
1073 v += dst[x + y*dst_stride];
1074 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1075 if(v&(~255)) v= ~(v>>31);
1076 dst8[x + y*src_stride] = v;
1078 dst[x + y*dst_stride] -= v;
1085 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1086 Plane *p= &s->plane[plane_index];
1087 const int mb_w= s->b_width << s->block_max_depth;
1088 const int mb_h= s->b_height << s->block_max_depth;
1090 int block_size = MB_SIZE >> s->block_max_depth;
1091 int block_w = plane_index ? block_size/2 : block_size;
1092 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1093 int obmc_stride= plane_index ? block_size : 2*block_size;
1094 int ref_stride= s->current_picture.linesize[plane_index];
1095 uint8_t *dst8= s->current_picture.data[plane_index];
1099 if(s->keyframe || (s->avctx->debug&512)){
1104 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1105 // DWTELEM * line = slice_buffer_get_line(sb, y);
1106 IDWTELEM * line = sb->line[y];
1108 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1109 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1111 if(v&(~255)) v= ~(v>>31);
1112 dst8[x + y*ref_stride]= v;
1116 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1117 // DWTELEM * line = slice_buffer_get_line(sb, y);
1118 IDWTELEM * line = sb->line[y];
1120 line[x] -= 128 << FRAC_BITS;
1121 // buf[x + y*w]-= 128<<FRAC_BITS;
1129 for(mb_x=0; mb_x<=mb_w; mb_x++){
1130 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1131 block_w*mb_x - block_w/2,
1132 block_w*mb_y - block_w/2,
1135 w, ref_stride, obmc_stride,
1137 add, 0, plane_index);
1141 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1142 Plane *p= &s->plane[plane_index];
1143 const int mb_w= s->b_width << s->block_max_depth;
1144 const int mb_h= s->b_height << s->block_max_depth;
1146 int block_size = MB_SIZE >> s->block_max_depth;
1147 int block_w = plane_index ? block_size/2 : block_size;
1148 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1149 const int obmc_stride= plane_index ? block_size : 2*block_size;
1150 int ref_stride= s->current_picture.linesize[plane_index];
1151 uint8_t *dst8= s->current_picture.data[plane_index];
1155 if(s->keyframe || (s->avctx->debug&512)){
1160 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1162 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1164 if(v&(~255)) v= ~(v>>31);
1165 dst8[x + y*ref_stride]= v;
1169 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1171 buf[x + y*w]-= 128<<FRAC_BITS;
1179 for(mb_x=0; mb_x<=mb_w; mb_x++){
1180 add_yblock(s, 0, NULL, buf, dst8, obmc,
1181 block_w*mb_x - block_w/2,
1182 block_w*mb_y - block_w/2,
1185 w, ref_stride, obmc_stride,
1187 add, 1, plane_index);
1191 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1192 const int mb_h= s->b_height << s->block_max_depth;
1194 for(mb_y=0; mb_y<=mb_h; mb_y++)
1195 predict_slice(s, buf, plane_index, add, mb_y);
1198 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1199 const int w= b->width;
1200 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1201 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1202 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1205 if(s->qlog == LOSSLESS_QLOG) return;
1207 for(y=start_y; y<end_y; y++){
1208 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1209 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1213 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1215 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1221 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1222 const int w= b->width;
1225 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1229 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1231 for(y=start_y; y<end_y; y++){
1233 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1234 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1238 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1239 else line[x] += line[x - 1];
1241 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1242 else line[x] += line[x - 1];
1245 if(y) line[x] += prev[x];
1251 static void decode_qlogs(SnowContext *s){
1252 int plane_index, level, orientation;
1254 for(plane_index=0; plane_index<3; plane_index++){
1255 for(level=0; level<s->spatial_decomposition_count; level++){
1256 for(orientation=level ? 1:0; orientation<4; orientation++){
1258 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1259 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1260 else q= get_symbol(&s->c, s->header_state, 1);
1261 s->plane[plane_index].band[level][orientation].qlog= q;
1267 #define GET_S(dst, check) \
1268 tmp= get_symbol(&s->c, s->header_state, 0);\
1270 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1275 static int decode_header(SnowContext *s){
1276 int plane_index, tmp;
1279 memset(kstate, MID_STATE, sizeof(kstate));
1281 s->keyframe= get_rac(&s->c, kstate);
1282 if(s->keyframe || s->always_reset){
1284 s->spatial_decomposition_type=
1288 s->block_max_depth= 0;
1291 GET_S(s->version, tmp <= 0U)
1292 s->always_reset= get_rac(&s->c, s->header_state);
1293 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1294 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1295 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1296 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1297 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1298 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1299 s->spatial_scalability= get_rac(&s->c, s->header_state);
1300 // s->rate_scalability= get_rac(&s->c, s->header_state);
1301 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1302 s->max_ref_frames++;
1308 if(get_rac(&s->c, s->header_state)){
1309 for(plane_index=0; plane_index<2; plane_index++){
1310 int htaps, i, sum=0;
1311 Plane *p= &s->plane[plane_index];
1312 p->diag_mc= get_rac(&s->c, s->header_state);
1313 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1314 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1317 for(i= htaps/2; i; i--){
1318 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1319 sum += p->hcoeff[i];
1321 p->hcoeff[0]= 32-sum;
1323 s->plane[2].diag_mc= s->plane[1].diag_mc;
1324 s->plane[2].htaps = s->plane[1].htaps;
1325 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1327 if(get_rac(&s->c, s->header_state)){
1328 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1333 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1334 if(s->spatial_decomposition_type > 1U){
1335 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1338 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1339 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1340 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1344 s->qlog += get_symbol(&s->c, s->header_state, 1);
1345 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1346 s->qbias += get_symbol(&s->c, s->header_state, 1);
1347 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1348 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1349 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1350 s->block_max_depth= 0;
1357 static void init_qexp(void){
1361 for(i=0; i<QROOT; i++){
1363 v *= pow(2, 1.0 / QROOT);
1367 static av_cold int common_init(AVCodecContext *avctx){
1368 SnowContext *s = avctx->priv_data;
1373 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1375 dsputil_init(&s->dsp, avctx);
1376 ff_dwt_init(&s->dwt);
1379 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1380 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1381 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1382 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1383 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1384 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1403 #define mcfh(dx,dy)\
1404 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1405 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1406 mc_block_hpel ## dx ## dy ## 16;\
1407 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1408 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1409 mc_block_hpel ## dx ## dy ## 8;
1419 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1421 width= s->avctx->width;
1422 height= s->avctx->height;
1424 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1425 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1427 for(i=0; i<MAX_REF_FRAMES; i++)
1428 for(j=0; j<MAX_REF_FRAMES; j++)
1429 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1431 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1432 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1437 static int common_init_after_header(AVCodecContext *avctx){
1438 SnowContext *s = avctx->priv_data;
1439 int plane_index, level, orientation;
1441 for(plane_index=0; plane_index<3; plane_index++){
1442 int w= s->avctx->width;
1443 int h= s->avctx->height;
1446 w>>= s->chroma_h_shift;
1447 h>>= s->chroma_v_shift;
1449 s->plane[plane_index].width = w;
1450 s->plane[plane_index].height= h;
1452 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1453 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1454 SubBand *b= &s->plane[plane_index].band[level][orientation];
1456 b->buf= s->spatial_dwt_buffer;
1458 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1459 b->width = (w + !(orientation&1))>>1;
1460 b->height= (h + !(orientation>1))>>1;
1462 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1463 b->buf_x_offset = 0;
1464 b->buf_y_offset = 0;
1468 b->buf_x_offset = (w+1)>>1;
1471 b->buf += b->stride>>1;
1472 b->buf_y_offset = b->stride_line >> 1;
1474 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1477 b->parent= &s->plane[plane_index].band[level-1][orientation];
1478 //FIXME avoid this realloc
1479 av_freep(&b->x_coeff);
1480 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1495 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1496 SubBand *b= &p->band[level][orientation];
1500 int step= 1 << (s->spatial_decomposition_count - level);
1507 //FIXME bias for nonzero ?
1509 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1510 for(y=0; y<p->height; y++){
1511 for(x=0; x<p->width; x++){
1512 int sx= (x-xo + step/2) / step / Q2_STEP;
1513 int sy= (y-yo + step/2) / step / Q2_STEP;
1514 int v= r0[x + y*p->width] - r1[x + y*p->width];
1515 assert(sx>=0 && sy>=0 && sx < score_stride);
1517 score[sx + sy*score_stride] += v*v;
1518 assert(score[sx + sy*score_stride] >= 0);
1523 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1524 int level, orientation;
1526 for(level=0; level<s->spatial_decomposition_count; level++){
1527 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1528 SubBand *b= &p->band[level][orientation];
1529 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1531 dequantize(s, b, dst, b->stride);
1536 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1537 int level, orientation, ys, xs, x, y, pass;
1538 IDWTELEM best_dequant[height * stride];
1539 IDWTELEM idwt2_buffer[height * stride];
1540 const int score_stride= (width + 10)/Q2_STEP;
1541 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1542 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1543 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1545 //FIXME pass the copy cleanly ?
1547 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1548 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1550 for(level=0; level<s->spatial_decomposition_count; level++){
1551 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1552 SubBand *b= &p->band[level][orientation];
1553 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1554 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1555 assert(src == b->buf); // code does not depend on this but it is true currently
1557 quantize(s, b, dst, src, b->stride, s->qbias);
1560 for(pass=0; pass<1; pass++){
1561 if(s->qbias == 0) //keyframe
1563 for(level=0; level<s->spatial_decomposition_count; level++){
1564 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1565 SubBand *b= &p->band[level][orientation];
1566 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1567 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1569 for(ys= 0; ys<Q2_STEP; ys++){
1570 for(xs= 0; xs<Q2_STEP; xs++){
1571 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1572 dequantize_all(s, p, idwt2_buffer, width, height);
1573 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1574 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1575 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1576 for(y=ys; y<b->height; y+= Q2_STEP){
1577 for(x=xs; x<b->width; x+= Q2_STEP){
1578 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1579 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1580 //FIXME try more than just --
1583 dequantize_all(s, p, idwt2_buffer, width, height);
1584 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1585 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1586 for(y=ys; y<b->height; y+= Q2_STEP){
1587 for(x=xs; x<b->width; x+= Q2_STEP){
1588 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1589 if(score[score_idx] <= best_score[score_idx] + threshold){
1590 best_score[score_idx]= score[score_idx];
1591 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1592 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1593 //FIXME copy instead
1602 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1605 #endif /* QUANTIZE2==1 */
1607 #define USE_HALFPEL_PLANE 0
1609 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1612 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1616 int w= s->avctx->width >>is_chroma;
1617 int h= s->avctx->height >>is_chroma;
1618 int ls= frame->linesize[p];
1619 uint8_t *src= frame->data[p];
1621 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1622 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1623 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1630 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1637 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1645 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1653 static void release_buffer(AVCodecContext *avctx){
1654 SnowContext *s = avctx->priv_data;
1657 if(s->last_picture[s->max_ref_frames-1].data[0]){
1658 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1660 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1661 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1665 static int frame_start(SnowContext *s){
1667 int w= s->avctx->width; //FIXME round up to x16 ?
1668 int h= s->avctx->height;
1670 if(s->current_picture.data[0]){
1671 s->dsp.draw_edges(s->current_picture.data[0],
1672 s->current_picture.linesize[0], w , h ,
1673 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1674 s->dsp.draw_edges(s->current_picture.data[1],
1675 s->current_picture.linesize[1], w>>1, h>>1,
1676 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1677 s->dsp.draw_edges(s->current_picture.data[2],
1678 s->current_picture.linesize[2], w>>1, h>>1,
1679 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1682 release_buffer(s->avctx);
1684 tmp= s->last_picture[s->max_ref_frames-1];
1685 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1686 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1687 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1688 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1689 s->last_picture[0]= s->current_picture;
1690 s->current_picture= tmp;
1696 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
1697 if(i && s->last_picture[i-1].key_frame)
1700 if(s->ref_frames==0){
1701 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
1706 s->current_picture.reference= 1;
1707 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
1708 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1712 s->current_picture.key_frame= s->keyframe;
1717 static av_cold void common_end(SnowContext *s){
1718 int plane_index, level, orientation, i;
1720 av_freep(&s->spatial_dwt_buffer);
1721 av_freep(&s->spatial_idwt_buffer);
1724 av_freep(&s->m.me.scratchpad);
1725 av_freep(&s->m.me.map);
1726 av_freep(&s->m.me.score_map);
1727 av_freep(&s->m.obmc_scratchpad);
1729 av_freep(&s->block);
1730 av_freep(&s->scratchbuf);
1732 for(i=0; i<MAX_REF_FRAMES; i++){
1733 av_freep(&s->ref_mvs[i]);
1734 av_freep(&s->ref_scores[i]);
1735 if(s->last_picture[i].data[0])
1736 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
1739 for(plane_index=0; plane_index<3; plane_index++){
1740 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1741 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1742 SubBand *b= &s->plane[plane_index].band[level][orientation];
1744 av_freep(&b->x_coeff);
1748 if (s->mconly_picture.data[0])
1749 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
1750 if (s->current_picture.data[0])
1751 s->avctx->release_buffer(s->avctx, &s->current_picture);
1754 static av_cold int decode_init(AVCodecContext *avctx)
1756 avctx->pix_fmt= PIX_FMT_YUV420P;
1763 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
1764 const uint8_t *buf = avpkt->data;
1765 int buf_size = avpkt->size;
1766 SnowContext *s = avctx->priv_data;
1767 RangeCoder * const c= &s->c;
1769 AVFrame *picture = data;
1770 int level, orientation, plane_index;
1772 ff_init_range_decoder(c, buf, buf_size);
1773 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1775 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
1776 if(decode_header(s)<0)
1778 common_init_after_header(avctx);
1780 // realloc slice buffer for the case that spatial_decomposition_count changed
1781 ff_slice_buffer_destroy(&s->sb);
1782 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
1784 for(plane_index=0; plane_index<3; plane_index++){
1785 Plane *p= &s->plane[plane_index];
1786 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
1787 && p->hcoeff[1]==-10
1793 if(frame_start(s) < 0)
1795 //keyframe flag duplication mess FIXME
1796 if(avctx->debug&FF_DEBUG_PICT_INFO)
1797 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
1801 for(plane_index=0; plane_index<3; plane_index++){
1802 Plane *p= &s->plane[plane_index];
1806 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
1808 if(s->avctx->debug&2048){
1809 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
1810 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1814 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
1815 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
1821 for(level=0; level<s->spatial_decomposition_count; level++){
1822 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1823 SubBand *b= &p->band[level][orientation];
1824 unpack_coeffs(s, b, b->parent, orientation);
1830 const int mb_h= s->b_height << s->block_max_depth;
1831 const int block_size = MB_SIZE >> s->block_max_depth;
1832 const int block_w = plane_index ? block_size/2 : block_size;
1834 DWTCompose cs[MAX_DECOMPOSITIONS];
1839 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
1840 for(mb_y=0; mb_y<=mb_h; mb_y++){
1842 int slice_starty = block_w*mb_y;
1843 int slice_h = block_w*(mb_y+1);
1844 if (!(s->keyframe || s->avctx->debug&512)){
1845 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
1846 slice_h -= (block_w >> 1);
1849 for(level=0; level<s->spatial_decomposition_count; level++){
1850 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1851 SubBand *b= &p->band[level][orientation];
1854 int our_mb_start = mb_y;
1855 int our_mb_end = (mb_y + 1);
1857 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
1858 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
1859 if (!(s->keyframe || s->avctx->debug&512)){
1860 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1861 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1863 start_y = FFMIN(b->height, start_y);
1864 end_y = FFMIN(b->height, end_y);
1866 if (start_y != end_y){
1867 if (orientation == 0){
1868 SubBand * correlate_band = &p->band[0][0];
1869 int correlate_end_y = FFMIN(b->height, end_y + 1);
1870 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
1871 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
1872 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
1873 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
1876 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
1881 for(; yd<slice_h; yd+=4){
1882 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
1885 if(s->qlog == LOSSLESS_QLOG){
1886 for(; yq<slice_h && yq<h; yq++){
1887 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
1889 line[x] <<= FRAC_BITS;
1894 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
1896 y = FFMIN(p->height, slice_starty);
1897 end_y = FFMIN(p->height, slice_h);
1899 ff_slice_buffer_release(&s->sb, y++);
1902 ff_slice_buffer_flush(&s->sb);
1909 release_buffer(avctx);
1911 if(!(s->avctx->debug&2048))
1912 *picture= s->current_picture;
1914 *picture= s->mconly_picture;
1916 *data_size = sizeof(AVFrame);
1918 bytes_read= c->bytestream - c->bytestream_start;
1919 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
1924 static av_cold int decode_end(AVCodecContext *avctx)
1926 SnowContext *s = avctx->priv_data;
1928 ff_slice_buffer_destroy(&s->sb);
1935 AVCodec ff_snow_decoder = {
1937 .type = AVMEDIA_TYPE_VIDEO,
1938 .id = CODEC_ID_SNOW,
1939 .priv_data_size = sizeof(SnowContext),
1940 .init = decode_init,
1941 .close = decode_end,
1942 .decode = decode_frame,
1943 .capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
1944 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
1947 #if CONFIG_SNOW_ENCODER
1948 static av_cold int encode_init(AVCodecContext *avctx)
1950 SnowContext *s = avctx->priv_data;
1953 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
1954 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
1955 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
1959 if(avctx->prediction_method == DWT_97
1960 && (avctx->flags & CODEC_FLAG_QSCALE)
1961 && avctx->global_quality == 0){
1962 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
1966 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
1968 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
1969 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
1971 for(plane_index=0; plane_index<3; plane_index++){
1972 s->plane[plane_index].diag_mc= 1;
1973 s->plane[plane_index].htaps= 6;
1974 s->plane[plane_index].hcoeff[0]= 40;
1975 s->plane[plane_index].hcoeff[1]= -10;
1976 s->plane[plane_index].hcoeff[2]= 2;
1977 s->plane[plane_index].fast_mc= 1;
1986 s->m.flags = avctx->flags;
1987 s->m.bit_rate= avctx->bit_rate;
1990 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1991 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1992 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1993 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
1994 h263_encode_init(&s->m); //mv_penalty
1996 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
1998 if(avctx->flags&CODEC_FLAG_PASS1){
1999 if(!avctx->stats_out)
2000 avctx->stats_out = av_mallocz(256);
2002 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2003 if(ff_rate_control_init(&s->m) < 0)
2006 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2008 avctx->coded_frame= &s->current_picture;
2009 switch(avctx->pix_fmt){
2010 // case PIX_FMT_YUV444P:
2011 // case PIX_FMT_YUV422P:
2012 case PIX_FMT_YUV420P:
2014 // case PIX_FMT_YUV411P:
2015 // case PIX_FMT_YUV410P:
2016 s->colorspace_type= 0;
2018 /* case PIX_FMT_RGB32:
2022 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2025 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2026 s->chroma_h_shift= 1;
2027 s->chroma_v_shift= 1;
2029 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2030 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2032 s->avctx->get_buffer(s->avctx, &s->input_picture);
2034 if(s->avctx->me_method == ME_ITER){
2036 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2037 for(i=0; i<s->max_ref_frames; i++){
2038 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2039 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2046 //near copy & paste from dsputil, FIXME
2047 static int pix_sum(uint8_t * pix, int line_size, int w)
2052 for (i = 0; i < w; i++) {
2053 for (j = 0; j < w; j++) {
2057 pix += line_size - w;
2062 //near copy & paste from dsputil, FIXME
2063 static int pix_norm1(uint8_t * pix, int line_size, int w)
2066 uint32_t *sq = ff_squareTbl + 256;
2069 for (i = 0; i < w; i++) {
2070 for (j = 0; j < w; j ++) {
2074 pix += line_size - w;
2082 #define P_TOPRIGHT P[3]
2083 #define P_MEDIAN P[4]
2085 #define FLAG_QPEL 1 //must be 1
2087 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2088 uint8_t p_buffer[1024];
2089 uint8_t i_buffer[1024];
2090 uint8_t p_state[sizeof(s->block_state)];
2091 uint8_t i_state[sizeof(s->block_state)];
2093 uint8_t *pbbak= s->c.bytestream;
2094 uint8_t *pbbak_start= s->c.bytestream_start;
2095 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2096 const int w= s->b_width << s->block_max_depth;
2097 const int h= s->b_height << s->block_max_depth;
2098 const int rem_depth= s->block_max_depth - level;
2099 const int index= (x + y*w) << rem_depth;
2100 const int block_w= 1<<(LOG2_MB_SIZE - level);
2101 int trx= (x+1)<<rem_depth;
2102 int try= (y+1)<<rem_depth;
2103 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2104 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2105 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2106 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2107 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2108 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2109 int pl = left->color[0];
2110 int pcb= left->color[1];
2111 int pcr= left->color[2];
2115 const int stride= s->current_picture.linesize[0];
2116 const int uvstride= s->current_picture.linesize[1];
2117 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2118 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2119 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2121 int16_t last_mv[3][2];
2122 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2123 const int shift= 1+qpel;
2124 MotionEstContext *c= &s->m.me;
2125 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2126 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2127 int my_context= av_log2(2*FFABS(left->my - top->my));
2128 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2129 int ref, best_ref, ref_score, ref_mx, ref_my;
2131 assert(sizeof(s->block_state) >= 256);
2133 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2137 // clip predictors / edge ?
2139 P_LEFT[0]= left->mx;
2140 P_LEFT[1]= left->my;
2143 P_TOPRIGHT[0]= tr->mx;
2144 P_TOPRIGHT[1]= tr->my;
2146 last_mv[0][0]= s->block[index].mx;
2147 last_mv[0][1]= s->block[index].my;
2148 last_mv[1][0]= right->mx;
2149 last_mv[1][1]= right->my;
2150 last_mv[2][0]= bottom->mx;
2151 last_mv[2][1]= bottom->my;
2158 assert(c-> stride == stride);
2159 assert(c->uvstride == uvstride);
2161 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2162 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2163 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2164 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2166 c->xmin = - x*block_w - 16+3;
2167 c->ymin = - y*block_w - 16+3;
2168 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2169 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2171 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2172 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2173 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2174 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2175 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2176 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2177 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2179 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2180 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2183 c->pred_x= P_LEFT[0];
2184 c->pred_y= P_LEFT[1];
2186 c->pred_x = P_MEDIAN[0];
2187 c->pred_y = P_MEDIAN[1];
2192 for(ref=0; ref<s->ref_frames; ref++){
2193 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2195 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2196 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2198 assert(ref_mx >= c->xmin);
2199 assert(ref_mx <= c->xmax);
2200 assert(ref_my >= c->ymin);
2201 assert(ref_my <= c->ymax);
2203 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2204 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2205 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2206 if(s->ref_mvs[ref]){
2207 s->ref_mvs[ref][index][0]= ref_mx;
2208 s->ref_mvs[ref][index][1]= ref_my;
2209 s->ref_scores[ref][index]= ref_score;
2211 if(score > ref_score){
2218 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2221 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2223 pc.bytestream_start=
2224 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2225 memcpy(p_state, s->block_state, sizeof(s->block_state));
2227 if(level!=s->block_max_depth)
2228 put_rac(&pc, &p_state[4 + s_context], 1);
2229 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2230 if(s->ref_frames > 1)
2231 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2232 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2233 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2234 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2235 p_len= pc.bytestream - pc.bytestream_start;
2236 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2238 block_s= block_w*block_w;
2239 sum = pix_sum(current_data[0], stride, block_w);
2240 l= (sum + block_s/2)/block_s;
2241 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2243 block_s= block_w*block_w>>2;
2244 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2245 cb= (sum + block_s/2)/block_s;
2246 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2247 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2248 cr= (sum + block_s/2)/block_s;
2249 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2252 ic.bytestream_start=
2253 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2254 memcpy(i_state, s->block_state, sizeof(s->block_state));
2255 if(level!=s->block_max_depth)
2256 put_rac(&ic, &i_state[4 + s_context], 1);
2257 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2258 put_symbol(&ic, &i_state[32], l-pl , 1);
2259 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2260 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2261 i_len= ic.bytestream - ic.bytestream_start;
2262 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2264 // assert(score==256*256*256*64-1);
2265 assert(iscore < 255*255*256 + s->lambda2*10);
2266 assert(iscore >= 0);
2267 assert(l>=0 && l<=255);
2268 assert(pl>=0 && pl<=255);
2271 int varc= iscore >> 8;
2272 int vard= score >> 8;
2273 if (vard <= 64 || vard < varc)
2274 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2276 c->scene_change_score+= s->m.qscale;
2279 if(level!=s->block_max_depth){
2280 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2281 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2282 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2283 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2284 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2285 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2287 if(score2 < score && score2 < iscore)
2292 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2293 memcpy(pbbak, i_buffer, i_len);
2295 s->c.bytestream_start= pbbak_start;
2296 s->c.bytestream= pbbak + i_len;
2297 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2298 memcpy(s->block_state, i_state, sizeof(s->block_state));
2301 memcpy(pbbak, p_buffer, p_len);
2303 s->c.bytestream_start= pbbak_start;
2304 s->c.bytestream= pbbak + p_len;
2305 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2306 memcpy(s->block_state, p_state, sizeof(s->block_state));
2311 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2312 const int w= s->b_width << s->block_max_depth;
2313 const int rem_depth= s->block_max_depth - level;
2314 const int index= (x + y*w) << rem_depth;
2315 int trx= (x+1)<<rem_depth;
2316 BlockNode *b= &s->block[index];
2317 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2318 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2319 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2320 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2321 int pl = left->color[0];
2322 int pcb= left->color[1];
2323 int pcr= left->color[2];
2325 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2326 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2327 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2328 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2331 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2335 if(level!=s->block_max_depth){
2336 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2337 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2339 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2340 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2341 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2342 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2343 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2347 if(b->type & BLOCK_INTRA){
2348 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2349 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2350 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2351 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2352 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2353 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2355 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2356 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2357 if(s->ref_frames > 1)
2358 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2359 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2360 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2361 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2365 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2367 Plane *p= &s->plane[plane_index];
2368 const int block_size = MB_SIZE >> s->block_max_depth;
2369 const int block_w = plane_index ? block_size/2 : block_size;
2370 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2371 const int obmc_stride= plane_index ? block_size : 2*block_size;
2372 const int ref_stride= s->current_picture.linesize[plane_index];
2373 uint8_t *src= s-> input_picture.data[plane_index];
2374 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2375 const int b_stride = s->b_width << s->block_max_depth;
2376 const int w= p->width;
2377 const int h= p->height;
2378 int index= mb_x + mb_y*b_stride;
2379 BlockNode *b= &s->block[index];
2380 BlockNode backup= *b;
2384 b->type|= BLOCK_INTRA;
2385 b->color[plane_index]= 0;
2386 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2389 int mb_x2= mb_x + (i &1) - 1;
2390 int mb_y2= mb_y + (i>>1) - 1;
2391 int x= block_w*mb_x2 + block_w/2;
2392 int y= block_w*mb_y2 + block_w/2;
2394 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2395 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2397 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2398 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2399 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2400 int obmc_v= obmc[index];
2402 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2403 if(x<0) obmc_v += obmc[index + block_w];
2404 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2405 if(x+block_w>w) obmc_v += obmc[index - block_w];
2406 //FIXME precalculate this or simplify it somehow else
2408 d = -dst[index] + (1<<(FRAC_BITS-1));
2410 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2411 aa += obmc_v * obmc_v; //FIXME precalculate this
2417 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2420 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2421 const int b_stride = s->b_width << s->block_max_depth;
2422 const int b_height = s->b_height<< s->block_max_depth;
2423 int index= x + y*b_stride;
2424 const BlockNode *b = &s->block[index];
2425 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2426 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2427 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2428 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2430 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2431 // int my_context= av_log2(2*FFABS(left->my - top->my));
2433 if(x<0 || x>=b_stride || y>=b_height)
2440 00001XXXX 15-30 8-15
2442 //FIXME try accurate rate
2443 //FIXME intra and inter predictors if surrounding blocks are not the same type
2444 if(b->type & BLOCK_INTRA){
2445 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2446 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2447 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2449 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2452 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2453 + av_log2(2*FFABS(dmy))
2454 + av_log2(2*b->ref));
2458 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2459 Plane *p= &s->plane[plane_index];
2460 const int block_size = MB_SIZE >> s->block_max_depth;
2461 const int block_w = plane_index ? block_size/2 : block_size;
2462 const int obmc_stride= plane_index ? block_size : 2*block_size;
2463 const int ref_stride= s->current_picture.linesize[plane_index];
2464 uint8_t *dst= s->current_picture.data[plane_index];
2465 uint8_t *src= s-> input_picture.data[plane_index];
2466 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2467 uint8_t *cur = s->scratchbuf;
2468 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2469 const int b_stride = s->b_width << s->block_max_depth;
2470 const int b_height = s->b_height<< s->block_max_depth;
2471 const int w= p->width;
2472 const int h= p->height;
2475 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2476 int sx= block_w*mb_x - block_w/2;
2477 int sy= block_w*mb_y - block_w/2;
2478 int x0= FFMAX(0,-sx);
2479 int y0= FFMAX(0,-sy);
2480 int x1= FFMIN(block_w*2, w-sx);
2481 int y1= FFMIN(block_w*2, h-sy);
2484 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2486 for(y=y0; y<y1; y++){
2487 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2488 const IDWTELEM *pred1 = pred + y*obmc_stride;
2489 uint8_t *cur1 = cur + y*ref_stride;
2490 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2491 for(x=x0; x<x1; x++){
2492 #if FRAC_BITS >= LOG2_OBMC_MAX
2493 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2495 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2497 v = (v + pred1[x]) >> FRAC_BITS;
2498 if(v&(~255)) v= ~(v>>31);
2503 /* copy the regions where obmc[] = (uint8_t)256 */
2504 if(LOG2_OBMC_MAX == 8
2505 && (mb_x == 0 || mb_x == b_stride-1)
2506 && (mb_y == 0 || mb_y == b_height-1)){
2515 for(y=y0; y<y1; y++)
2516 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2520 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2521 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2522 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2523 * So improving the score of one block is not strictly guaranteed
2524 * to improve the score of the whole frame, thus iterative motion
2525 * estimation does not always converge. */
2526 if(s->avctx->me_cmp == FF_CMP_W97)
2527 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2528 else if(s->avctx->me_cmp == FF_CMP_W53)
2529 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2533 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2534 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2539 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2548 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2550 if(mb_x == b_stride-2)
2551 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2553 return distortion + rate*penalty_factor;
2556 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2558 Plane *p= &s->plane[plane_index];
2559 const int block_size = MB_SIZE >> s->block_max_depth;
2560 const int block_w = plane_index ? block_size/2 : block_size;
2561 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2562 const int obmc_stride= plane_index ? block_size : 2*block_size;
2563 const int ref_stride= s->current_picture.linesize[plane_index];
2564 uint8_t *dst= s->current_picture.data[plane_index];
2565 uint8_t *src= s-> input_picture.data[plane_index];
2566 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2567 // const has only been removed from zero_dst to suppress a warning
2568 static IDWTELEM zero_dst[4096]; //FIXME
2569 const int b_stride = s->b_width << s->block_max_depth;
2570 const int w= p->width;
2571 const int h= p->height;
2574 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2577 int mb_x2= mb_x + (i%3) - 1;
2578 int mb_y2= mb_y + (i/3) - 1;
2579 int x= block_w*mb_x2 + block_w/2;
2580 int y= block_w*mb_y2 + block_w/2;
2582 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2583 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2585 //FIXME find a cleaner/simpler way to skip the outside stuff
2586 for(y2= y; y2<0; y2++)
2587 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2588 for(y2= h; y2<y+block_w; y2++)
2589 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2591 for(y2= y; y2<y+block_w; y2++)
2592 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2595 for(y2= y; y2<y+block_w; y2++)
2596 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2599 assert(block_w== 8 || block_w==16);
2600 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2604 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2605 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2613 rate = get_block_bits(s, mb_x, mb_y, 2);
2614 for(i=merged?4:0; i<9; i++){
2615 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2616 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2619 return distortion + rate*penalty_factor;
2622 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2623 const int w= b->width;
2624 const int h= b->height;
2636 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2637 v= src[x + y*stride];
2640 t= src[x + (y-1)*stride];
2642 lt= src[x - 1 + (y-1)*stride];
2645 rt= src[x + 1 + (y-1)*stride];
2649 l= src[x - 1 + y*stride];
2651 if(orientation==1) ll= src[y + (x-2)*stride];
2652 else ll= src[x - 2 + y*stride];
2658 if(px<b->parent->width && py<b->parent->height)
2659 p= parent[px + py*2*stride];
2661 if(!(/*ll|*/l|lt|t|rt|p)){
2663 runs[run_index++]= run;
2671 max_index= run_index;
2672 runs[run_index++]= run;
2674 run= runs[run_index++];
2676 put_symbol2(&s->c, b->state[30], max_index, 0);
2677 if(run_index <= max_index)
2678 put_symbol2(&s->c, b->state[1], run, 3);
2681 if(s->c.bytestream_end - s->c.bytestream < w*40){
2682 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2687 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2688 v= src[x + y*stride];
2691 t= src[x + (y-1)*stride];
2693 lt= src[x - 1 + (y-1)*stride];
2696 rt= src[x + 1 + (y-1)*stride];
2700 l= src[x - 1 + y*stride];
2702 if(orientation==1) ll= src[y + (x-2)*stride];
2703 else ll= src[x - 2 + y*stride];
2709 if(px<b->parent->width && py<b->parent->height)
2710 p= parent[px + py*2*stride];
2712 if(/*ll|*/l|lt|t|rt|p){
2713 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2715 put_rac(&s->c, &b->state[0][context], !!v);
2718 run= runs[run_index++];
2720 if(run_index <= max_index)
2721 put_symbol2(&s->c, b->state[1], run, 3);
2729 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2730 int l2= 2*FFABS(l) + (l<0);
2731 int t2= 2*FFABS(t) + (t<0);
2733 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
2734 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
2742 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2743 // encode_subband_qtree(s, b, src, parent, stride, orientation);
2744 // encode_subband_z0run(s, b, src, parent, stride, orientation);
2745 return encode_subband_c0run(s, b, src, parent, stride, orientation);
2746 // encode_subband_dzr(s, b, src, parent, stride, orientation);
2749 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2750 const int b_stride= s->b_width << s->block_max_depth;
2751 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2752 BlockNode backup= *block;
2753 int rd, index, value;
2755 assert(mb_x>=0 && mb_y>=0);
2756 assert(mb_x<b_stride);
2759 block->color[0] = p[0];
2760 block->color[1] = p[1];
2761 block->color[2] = p[2];
2762 block->type |= BLOCK_INTRA;
2764 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2765 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2766 if(s->me_cache[index] == value)
2768 s->me_cache[index]= value;
2772 block->type &= ~BLOCK_INTRA;
2775 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2787 /* special case for int[2] args we discard afterwards,
2788 * fixes compilation problem with gcc 2.95 */
2789 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2790 int p[2] = {p0, p1};
2791 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2794 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2795 const int b_stride= s->b_width << s->block_max_depth;
2796 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2797 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2798 int rd, index, value;
2800 assert(mb_x>=0 && mb_y>=0);
2801 assert(mb_x<b_stride);
2802 assert(((mb_x|mb_y)&1) == 0);
2804 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2805 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2806 if(s->me_cache[index] == value)
2808 s->me_cache[index]= value;
2813 block->type &= ~BLOCK_INTRA;
2814 block[1]= block[b_stride]= block[b_stride+1]= *block;
2816 rd= get_4block_rd(s, mb_x, mb_y, 0);
2823 block[0]= backup[0];
2824 block[1]= backup[1];
2825 block[b_stride]= backup[2];
2826 block[b_stride+1]= backup[3];
2831 static void iterative_me(SnowContext *s){
2832 int pass, mb_x, mb_y;
2833 const int b_width = s->b_width << s->block_max_depth;
2834 const int b_height= s->b_height << s->block_max_depth;
2835 const int b_stride= b_width;
2839 RangeCoder r = s->c;
2840 uint8_t state[sizeof(s->block_state)];
2841 memcpy(state, s->block_state, sizeof(s->block_state));
2842 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2843 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2844 encode_q_branch(s, 0, mb_x, mb_y);
2846 memcpy(s->block_state, state, sizeof(s->block_state));
2849 for(pass=0; pass<25; pass++){
2852 for(mb_y= 0; mb_y<b_height; mb_y++){
2853 for(mb_x= 0; mb_x<b_width; mb_x++){
2854 int dia_change, i, j, ref;
2855 int best_rd= INT_MAX, ref_rd;
2856 BlockNode backup, ref_b;
2857 const int index= mb_x + mb_y * b_stride;
2858 BlockNode *block= &s->block[index];
2859 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
2860 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
2861 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
2862 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
2863 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
2864 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
2865 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2866 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2867 const int b_w= (MB_SIZE >> s->block_max_depth);
2868 uint8_t obmc_edged[b_w*2][b_w*2];
2870 if(pass && (block->type & BLOCK_OPT))
2872 block->type |= BLOCK_OPT;
2876 if(!s->me_cache_generation)
2877 memset(s->me_cache, 0, sizeof(s->me_cache));
2878 s->me_cache_generation += 1<<22;
2880 //FIXME precalculate
2883 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2885 for(y=0; y<b_w*2; y++)
2886 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2887 if(mb_x==b_stride-1)
2888 for(y=0; y<b_w*2; y++)
2889 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2891 for(x=0; x<b_w*2; x++)
2892 obmc_edged[0][x] += obmc_edged[b_w-1][x];
2893 for(y=1; y<b_w; y++)
2894 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2896 if(mb_y==b_height-1){
2897 for(x=0; x<b_w*2; x++)
2898 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2899 for(y=b_w; y<b_w*2-1; y++)
2900 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2904 //skip stuff outside the picture
2905 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
2906 uint8_t *src= s-> input_picture.data[0];
2907 uint8_t *dst= s->current_picture.data[0];
2908 const int stride= s->current_picture.linesize[0];
2909 const int block_w= MB_SIZE >> s->block_max_depth;
2910 const int sx= block_w*mb_x - block_w/2;
2911 const int sy= block_w*mb_y - block_w/2;
2912 const int w= s->plane[0].width;
2913 const int h= s->plane[0].height;
2917 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2918 for(y=h; y<sy+block_w*2; y++)
2919 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2921 for(y=sy; y<sy+block_w*2; y++)
2922 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
2924 if(sx+block_w*2 > w){
2925 for(y=sy; y<sy+block_w*2; y++)
2926 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
2930 // intra(black) = neighbors' contribution to the current block
2932 color[i]= get_dc(s, mb_x, mb_y, i);
2934 // get previous score (cannot be cached due to OBMC)
2935 if(pass > 0 && (block->type&BLOCK_INTRA)){
2936 int color0[3]= {block->color[0], block->color[1], block->color[2]};
2937 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
2939 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
2943 for(ref=0; ref < s->ref_frames; ref++){
2944 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
2945 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
2950 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
2951 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
2953 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
2955 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
2957 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
2959 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
2962 //FIXME avoid subpel interpolation / round to nearest integer
2965 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
2967 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2968 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2969 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2970 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2976 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
2979 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
2981 //FIXME or try the standard 2 pass qpel or similar
2983 mvr[0][0]= block->mx;
2984 mvr[0][1]= block->my;
2985 if(ref_rd > best_rd){
2992 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
2993 //FIXME RD style color selection
2994 if(!same_block(block, &backup)){
2995 if(tb ) tb ->type &= ~BLOCK_OPT;
2996 if(lb ) lb ->type &= ~BLOCK_OPT;
2997 if(rb ) rb ->type &= ~BLOCK_OPT;
2998 if(bb ) bb ->type &= ~BLOCK_OPT;
2999 if(tlb) tlb->type &= ~BLOCK_OPT;
3000 if(trb) trb->type &= ~BLOCK_OPT;
3001 if(blb) blb->type &= ~BLOCK_OPT;
3002 if(brb) brb->type &= ~BLOCK_OPT;
3007 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3012 if(s->block_max_depth == 1){
3014 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3015 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3017 int best_rd, init_rd;
3018 const int index= mb_x + mb_y * b_stride;
3021 b[0]= &s->block[index];
3023 b[2]= b[0]+b_stride;
3025 if(same_block(b[0], b[1]) &&
3026 same_block(b[0], b[2]) &&
3027 same_block(b[0], b[3]))
3030 if(!s->me_cache_generation)
3031 memset(s->me_cache, 0, sizeof(s->me_cache));
3032 s->me_cache_generation += 1<<22;
3034 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3036 //FIXME more multiref search?
3037 check_4block_inter(s, mb_x, mb_y,
3038 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3039 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3042 if(!(b[i]->type&BLOCK_INTRA))
3043 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3045 if(init_rd != best_rd)
3049 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3053 static void encode_blocks(SnowContext *s, int search){
3058 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3062 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3063 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3067 if(s->avctx->me_method == ME_ITER || !search)
3068 encode_q_branch2(s, 0, x, y);
3070 encode_q_branch (s, 0, x, y);
3075 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3076 const int w= b->width;
3077 const int h= b->height;
3078 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3079 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3080 int x,y, thres1, thres2;
3082 if(s->qlog == LOSSLESS_QLOG){
3085 dst[x + y*stride]= src[x + y*stride];
3089 bias= bias ? 0 : (3*qmul)>>3;
3090 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3096 int i= src[x + y*stride];
3098 if((unsigned)(i+thres1) > thres2){
3101 i/= qmul; //FIXME optimize
3102 dst[x + y*stride]= i;
3106 i/= qmul; //FIXME optimize
3107 dst[x + y*stride]= -i;
3110 dst[x + y*stride]= 0;
3116 int i= src[x + y*stride];
3118 if((unsigned)(i+thres1) > thres2){
3121 i= (i + bias) / qmul; //FIXME optimize
3122 dst[x + y*stride]= i;
3126 i= (i + bias) / qmul; //FIXME optimize
3127 dst[x + y*stride]= -i;
3130 dst[x + y*stride]= 0;
3136 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3137 const int w= b->width;
3138 const int h= b->height;
3139 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3140 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3141 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3144 if(s->qlog == LOSSLESS_QLOG) return;
3148 int i= src[x + y*stride];
3150 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3152 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3158 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3159 const int w= b->width;
3160 const int h= b->height;
3163 for(y=h-1; y>=0; y--){
3164 for(x=w-1; x>=0; x--){
3165 int i= x + y*stride;
3169 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3170 else src[i] -= src[i - 1];
3172 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3173 else src[i] -= src[i - 1];
3176 if(y) src[i] -= src[i - stride];
3182 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3183 const int w= b->width;
3184 const int h= b->height;
3189 int i= x + y*stride;
3193 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3194 else src[i] += src[i - 1];
3196 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3197 else src[i] += src[i - 1];
3200 if(y) src[i] += src[i - stride];
3206 static void encode_qlogs(SnowContext *s){
3207 int plane_index, level, orientation;
3209 for(plane_index=0; plane_index<2; plane_index++){
3210 for(level=0; level<s->spatial_decomposition_count; level++){
3211 for(orientation=level ? 1:0; orientation<4; orientation++){
3212 if(orientation==2) continue;
3213 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3219 static void encode_header(SnowContext *s){
3223 memset(kstate, MID_STATE, sizeof(kstate));
3225 put_rac(&s->c, kstate, s->keyframe);
3226 if(s->keyframe || s->always_reset){
3228 s->last_spatial_decomposition_type=
3232 s->last_block_max_depth= 0;
3233 for(plane_index=0; plane_index<2; plane_index++){
3234 Plane *p= &s->plane[plane_index];
3237 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3241 put_symbol(&s->c, s->header_state, s->version, 0);
3242 put_rac(&s->c, s->header_state, s->always_reset);
3243 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3244 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3245 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3246 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3247 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3248 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3249 put_rac(&s->c, s->header_state, s->spatial_scalability);
3250 // put_rac(&s->c, s->header_state, s->rate_scalability);
3251 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3258 for(plane_index=0; plane_index<2; plane_index++){
3259 Plane *p= &s->plane[plane_index];
3260 update_mc |= p->last_htaps != p->htaps;
3261 update_mc |= p->last_diag_mc != p->diag_mc;
3262 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3264 put_rac(&s->c, s->header_state, update_mc);
3266 for(plane_index=0; plane_index<2; plane_index++){
3267 Plane *p= &s->plane[plane_index];
3268 put_rac(&s->c, s->header_state, p->diag_mc);
3269 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3270 for(i= p->htaps/2; i; i--)
3271 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3274 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3275 put_rac(&s->c, s->header_state, 1);
3276 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3279 put_rac(&s->c, s->header_state, 0);
3282 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3283 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3284 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3285 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3286 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3290 static void update_last_header_values(SnowContext *s){
3294 for(plane_index=0; plane_index<2; plane_index++){
3295 Plane *p= &s->plane[plane_index];
3296 p->last_diag_mc= p->diag_mc;
3297 p->last_htaps = p->htaps;
3298 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3302 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3303 s->last_qlog = s->qlog;
3304 s->last_qbias = s->qbias;
3305 s->last_mv_scale = s->mv_scale;
3306 s->last_block_max_depth = s->block_max_depth;
3307 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3310 static int qscale2qlog(int qscale){
3311 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3312 + 61*QROOT/8; ///< 64 > 60
3315 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3317 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3318 * FIXME we know exact mv bits at this point,
3319 * but ratecontrol isn't set up to include them. */
3320 uint32_t coef_sum= 0;
3321 int level, orientation, delta_qlog;
3323 for(level=0; level<s->spatial_decomposition_count; level++){
3324 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3325 SubBand *b= &s->plane[0].band[level][orientation];
3326 IDWTELEM *buf= b->ibuf;
3327 const int w= b->width;
3328 const int h= b->height;
3329 const int stride= b->stride;
3330 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3331 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3332 const int qdiv= (1<<16)/qmul;
3334 //FIXME this is ugly
3337 buf[x+y*stride]= b->buf[x+y*stride];
3339 decorrelate(s, b, buf, stride, 1, 0);
3342 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3346 /* ugly, ratecontrol just takes a sqrt again */
3347 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3348 assert(coef_sum < INT_MAX);
3350 if(pict->pict_type == AV_PICTURE_TYPE_I){
3351 s->m.current_picture.mb_var_sum= coef_sum;
3352 s->m.current_picture.mc_mb_var_sum= 0;
3354 s->m.current_picture.mc_mb_var_sum= coef_sum;
3355 s->m.current_picture.mb_var_sum= 0;
3358 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3359 if (pict->quality < 0)
3361 s->lambda= pict->quality * 3/2;
3362 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3363 s->qlog+= delta_qlog;
3367 static void calculate_visual_weight(SnowContext *s, Plane *p){
3368 int width = p->width;
3369 int height= p->height;
3370 int level, orientation, x, y;
3372 for(level=0; level<s->spatial_decomposition_count; level++){
3373 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3374 SubBand *b= &p->band[level][orientation];
3375 IDWTELEM *ibuf= b->ibuf;
3378 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3379 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3380 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3381 for(y=0; y<height; y++){
3382 for(x=0; x<width; x++){
3383 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3388 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3393 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3394 SnowContext *s = avctx->priv_data;
3395 RangeCoder * const c= &s->c;
3396 AVFrame *pict = data;
3397 const int width= s->avctx->width;
3398 const int height= s->avctx->height;
3399 int level, orientation, plane_index, i, y;
3400 uint8_t rc_header_bak[sizeof(s->header_state)];
3401 uint8_t rc_block_bak[sizeof(s->block_state)];
3403 ff_init_range_encoder(c, buf, buf_size);
3404 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3408 for(y=0; y<(height>>shift); y++)
3409 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3410 &pict->data[i][y * pict->linesize[i]],
3413 s->new_picture = *pict;
3415 s->m.picture_number= avctx->frame_number;
3416 if(avctx->flags&CODEC_FLAG_PASS2){
3418 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3419 s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
3420 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3421 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3422 if (pict->quality < 0)
3426 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3428 pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3431 if(s->pass1_rc && avctx->frame_number == 0)
3432 pict->quality= 2*FF_QP2LAMBDA;
3434 s->qlog= qscale2qlog(pict->quality);
3435 s->lambda = pict->quality * 3/2;
3437 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3438 s->qlog= LOSSLESS_QLOG;
3440 }//else keep previous frame's qlog until after motion estimation
3444 s->m.current_picture_ptr= &s->m.current_picture;
3445 s->m.last_picture.f.pts = s->m.current_picture.f.pts;
3446 s->m.current_picture.f.pts = pict->pts;
3447 if(pict->pict_type == AV_PICTURE_TYPE_P){
3448 int block_width = (width +15)>>4;
3449 int block_height= (height+15)>>4;
3450 int stride= s->current_picture.linesize[0];
3452 assert(s->current_picture.data[0]);
3453 assert(s->last_picture[0].data[0]);
3455 s->m.avctx= s->avctx;
3456 s->m.current_picture.f.data[0] = s->current_picture.data[0];
3457 s->m. last_picture.f.data[0] = s->last_picture[0].data[0];
3458 s->m. new_picture.f.data[0] = s-> input_picture.data[0];
3459 s->m. last_picture_ptr= &s->m. last_picture;
3461 s->m. last_picture.f.linesize[0] =
3462 s->m. new_picture.f.linesize[0] =
3463 s->m.current_picture.f.linesize[0] = stride;
3464 s->m.uvlinesize= s->current_picture.linesize[1];
3466 s->m.height= height;
3467 s->m.mb_width = block_width;
3468 s->m.mb_height= block_height;
3469 s->m.mb_stride= s->m.mb_width+1;
3470 s->m.b8_stride= 2*s->m.mb_width+1;
3472 s->m.pict_type= pict->pict_type;
3473 s->m.me_method= s->avctx->me_method;
3474 s->m.me.scene_change_score=0;
3475 s->m.flags= s->avctx->flags;
3476 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3477 s->m.out_format= FMT_H263;
3478 s->m.unrestricted_mv= 1;
3480 s->m.lambda = s->lambda;
3481 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3482 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3484 s->m.dsp= s->dsp; //move
3490 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3491 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3496 if(pict->pict_type == AV_PICTURE_TYPE_I)
3497 s->spatial_decomposition_count= 5;
3499 s->spatial_decomposition_count= 5;
3501 s->m.pict_type = pict->pict_type;
3502 s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
3504 common_init_after_header(avctx);
3506 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3507 for(plane_index=0; plane_index<3; plane_index++){
3508 calculate_visual_weight(s, &s->plane[plane_index]);
3513 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3514 encode_blocks(s, 1);
3515 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3517 for(plane_index=0; plane_index<3; plane_index++){
3518 Plane *p= &s->plane[plane_index];
3522 // int bits= put_bits_count(&s->c.pb);
3524 if (!s->memc_only) {
3526 if(pict->data[plane_index]) //FIXME gray hack
3529 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3532 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3535 && pict->pict_type == AV_PICTURE_TYPE_P
3536 && !(avctx->flags&CODEC_FLAG_PASS2)
3537 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3538 ff_init_range_encoder(c, buf, buf_size);
3539 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3540 pict->pict_type= AV_PICTURE_TYPE_I;
3542 s->current_picture.key_frame=1;
3546 if(s->qlog == LOSSLESS_QLOG){
3549 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3555 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3561 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3563 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3565 if(s->pass1_rc && plane_index==0){
3566 int delta_qlog = ratecontrol_1pass(s, pict);
3567 if (delta_qlog <= INT_MIN)
3570 //reordering qlog in the bitstream would eliminate this reset
3571 ff_init_range_encoder(c, buf, buf_size);
3572 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3573 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3575 encode_blocks(s, 0);
3579 for(level=0; level<s->spatial_decomposition_count; level++){
3580 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3581 SubBand *b= &p->band[level][orientation];
3584 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3586 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
3587 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3588 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3590 correlate(s, b, b->ibuf, b->stride, 1, 0);
3594 for(level=0; level<s->spatial_decomposition_count; level++){
3595 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3596 SubBand *b= &p->band[level][orientation];
3598 dequantize(s, b, b->ibuf, b->stride);
3602 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3603 if(s->qlog == LOSSLESS_QLOG){
3606 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3610 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3613 if(pict->pict_type == AV_PICTURE_TYPE_I){
3616 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3617 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3621 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3622 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3625 if(s->avctx->flags&CODEC_FLAG_PSNR){
3628 if(pict->data[plane_index]) //FIXME gray hack
3631 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3635 s->avctx->error[plane_index] += error;
3636 s->current_picture.error[plane_index] = error;
3641 update_last_header_values(s);
3643 release_buffer(avctx);
3645 s->current_picture.coded_picture_number = avctx->frame_number;
3646 s->current_picture.pict_type = pict->pict_type;
3647 s->current_picture.quality = pict->quality;
3648 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3649 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3650 s->m.current_picture.f.display_picture_number =
3651 s->m.current_picture.f.coded_picture_number = avctx->frame_number;
3652 s->m.current_picture.f.quality = pict->quality;
3653 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3655 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3657 if(avctx->flags&CODEC_FLAG_PASS1)
3658 ff_write_pass1_stats(&s->m);
3659 s->m.last_pict_type = s->m.pict_type;
3660 avctx->frame_bits = s->m.frame_bits;
3661 avctx->mv_bits = s->m.mv_bits;
3662 avctx->misc_bits = s->m.misc_bits;
3663 avctx->p_tex_bits = s->m.p_tex_bits;
3667 return ff_rac_terminate(c);
3670 static av_cold int encode_end(AVCodecContext *avctx)
3672 SnowContext *s = avctx->priv_data;
3675 if (s->input_picture.data[0])
3676 avctx->release_buffer(avctx, &s->input_picture);
3677 av_free(avctx->stats_out);
3682 #define OFFSET(x) offsetof(SnowContext, x)
3683 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
3684 static const AVOption options[] = {
3685 { "memc_only", "Only do ME/MC (I frames -> ref, P frame -> ME+MC).", OFFSET(memc_only), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3689 static const AVClass snowenc_class = {
3690 .class_name = "snow encoder",
3691 .item_name = av_default_item_name,
3693 .version = LIBAVUTIL_VERSION_INT,
3696 AVCodec ff_snow_encoder = {
3698 .type = AVMEDIA_TYPE_VIDEO,
3699 .id = CODEC_ID_SNOW,
3700 .priv_data_size = sizeof(SnowContext),
3701 .init = encode_init,
3702 .encode = encode_frame,
3703 .close = encode_end,
3704 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3705 .priv_class = &snowenc_class,
3715 #include "libavutil/lfg.h"
3716 #include "libavutil/mathematics.h"
3721 int buffer[2][width*height];
3725 s.spatial_decomposition_count=6;
3726 s.spatial_decomposition_type=1;
3728 av_lfg_init(&prng, 1);
3730 printf("testing 5/3 DWT\n");
3731 for(i=0; i<width*height; i++)
3732 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3734 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3735 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3737 for(i=0; i<width*height; i++)
3738 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3740 printf("testing 9/7 DWT\n");
3741 s.spatial_decomposition_type=0;
3742 for(i=0; i<width*height; i++)
3743 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3745 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3746 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3748 for(i=0; i<width*height; i++)
3749 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3752 int level, orientation, x, y;
3753 int64_t errors[8][4];
3756 memset(errors, 0, sizeof(errors));
3757 s.spatial_decomposition_count=3;
3758 s.spatial_decomposition_type=0;
3759 for(level=0; level<s.spatial_decomposition_count; level++){
3760 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3761 int w= width >> (s.spatial_decomposition_count-level);
3762 int h= height >> (s.spatial_decomposition_count-level);
3763 int stride= width << (s.spatial_decomposition_count-level);
3764 DWTELEM *buf= buffer[0];
3767 if(orientation&1) buf+=w;
3768 if(orientation>1) buf+=stride>>1;
3770 memset(buffer[0], 0, sizeof(int)*width*height);
3771 buf[w/2 + h/2*stride]= 256*256;
3772 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3773 for(y=0; y<height; y++){
3774 for(x=0; x<width; x++){
3775 int64_t d= buffer[0][x + y*width];
3777 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
3779 if(FFABS(height/2-y)<9 && level==2) printf("\n");
3781 error= (int)(sqrt(error)+0.5);
3782 errors[level][orientation]= error;
3783 if(g) g=av_gcd(g, error);
3787 printf("static int const visual_weight[][4]={\n");
3788 for(level=0; level<s.spatial_decomposition_count; level++){
3790 for(orientation=0; orientation<4; orientation++){
3791 printf("%8"PRId64",", errors[level][orientation]/g);
3798 int w= width >> (s.spatial_decomposition_count-level);
3799 //int h= height >> (s.spatial_decomposition_count-level);
3800 int stride= width << (s.spatial_decomposition_count-level);
3801 DWTELEM *buf= buffer[0];
3807 memset(buffer[0], 0, sizeof(int)*width*height);
3808 for(y=0; y<height; y++){
3809 for(x=0; x<width; x++){
3810 int tab[4]={0,2,3,1};
3811 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3814 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3815 for(y=0; y<height; y++){
3816 for(x=0; x<width; x++){
3817 int64_t d= buffer[0][x + y*width];
3819 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
3821 if(FFABS(height/2-y)<9) printf("\n");