2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
27 #include "rangecoder.h"
30 #include "mpegvideo.h"
36 static const int8_t quant3bA[256]={
37 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
38 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
39 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
40 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
41 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
42 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
43 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
44 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
45 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
46 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
47 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
48 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
49 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
50 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
51 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
52 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
55 static const uint8_t obmc32[1024]={
56 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
57 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
58 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
59 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
60 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
61 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
62 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
63 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
64 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
65 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
66 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
67 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
68 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
69 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
70 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
71 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
72 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
73 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
74 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
75 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
76 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
77 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
78 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
79 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
80 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
81 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
82 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
83 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
84 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
85 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
86 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
87 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
90 static const uint8_t obmc16[256]={
91 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
92 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
93 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
94 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
95 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
96 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
97 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
98 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
99 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
100 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
101 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
102 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
103 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
104 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
105 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
106 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
111 static const uint8_t obmc8[64]={
112 4, 12, 20, 28, 28, 20, 12, 4,
113 12, 36, 60, 84, 84, 60, 36, 12,
114 20, 60,100,140,140,100, 60, 20,
115 28, 84,140,196,196,140, 84, 28,
116 28, 84,140,196,196,140, 84, 28,
117 20, 60,100,140,140,100, 60, 20,
118 12, 36, 60, 84, 84, 60, 36, 12,
119 4, 12, 20, 28, 28, 20, 12, 4,
124 static const uint8_t obmc4[16]={
132 static const uint8_t * const obmc_tab[4]={
133 obmc32, obmc16, obmc8, obmc4
136 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
138 typedef struct BlockNode{
144 //#define TYPE_SPLIT 1
145 #define BLOCK_INTRA 1
147 //#define TYPE_NOCOLOR 4
148 uint8_t level; //FIXME merge into type?
151 static const BlockNode null_block= { //FIXME add border maybe
152 .color= {128,128,128},
160 #define LOG2_MB_SIZE 4
161 #define MB_SIZE (1<<LOG2_MB_SIZE)
162 #define ENCODER_EXTRA_BITS 4
165 typedef struct x_and_coeff{
170 typedef struct SubBand{
175 int qlog; ///< log(qscale)/log[2^(1/6)]
180 int stride_line; ///< Stride measured in lines, not pixels.
181 x_and_coeff * x_coeff;
182 struct SubBand *parent;
183 uint8_t state[/*7*2*/ 7 + 512][32];
186 typedef struct Plane{
189 SubBand band[MAX_DECOMPOSITIONS][4];
192 int8_t hcoeff[HTAPS_MAX/2];
197 int8_t last_hcoeff[HTAPS_MAX/2];
201 typedef struct SnowContext{
203 AVCodecContext *avctx;
208 AVFrame input_picture; ///< new_picture with the internal linesizes
209 AVFrame current_picture;
210 AVFrame last_picture[MAX_REF_FRAMES];
211 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
212 AVFrame mconly_picture;
213 // uint8_t q_context[16];
214 uint8_t header_state[32];
215 uint8_t block_state[128 + 32*128];
219 int spatial_decomposition_type;
220 int last_spatial_decomposition_type;
221 int temporal_decomposition_type;
222 int spatial_decomposition_count;
223 int last_spatial_decomposition_count;
224 int temporal_decomposition_count;
227 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
228 uint32_t *ref_scores[MAX_REF_FRAMES];
229 DWTELEM *spatial_dwt_buffer;
230 IDWTELEM *spatial_idwt_buffer;
234 int spatial_scalability;
244 #define QBIAS_SHIFT 3
248 int last_block_max_depth;
249 Plane plane[MAX_PLANES];
251 #define ME_CACHE_SIZE 1024
252 int me_cache[ME_CACHE_SIZE];
253 int me_cache_generation;
256 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
262 // Avoid a name clash on SGI IRIX
265 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
266 static uint8_t qexp[QROOT];
268 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
272 const int a= FFABS(v);
273 const int e= av_log2(a);
274 const int el= FFMIN(e, 10);
275 put_rac(c, state+0, 0);
278 put_rac(c, state+1+i, 1); //1..10
281 put_rac(c, state+1+9, 1); //1..10
283 put_rac(c, state+1+FFMIN(i,9), 0);
285 for(i=e-1; i>=el; i--){
286 put_rac(c, state+22+9, (a>>i)&1); //22..31
289 put_rac(c, state+22+i, (a>>i)&1); //22..31
293 put_rac(c, state+11 + el, v < 0); //11..21
295 put_rac(c, state+0, 1);
299 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
300 if(get_rac(c, state+0))
305 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
310 for(i=e-1; i>=0; i--){
311 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
314 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
319 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
321 int r= log2>=0 ? 1<<log2 : 1;
327 put_rac(c, state+4+log2, 1);
332 put_rac(c, state+4+log2, 0);
334 for(i=log2-1; i>=0; i--){
335 put_rac(c, state+31-i, (v>>i)&1);
339 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
341 int r= log2>=0 ? 1<<log2 : 1;
346 while(get_rac(c, state+4+log2)){
352 for(i=log2-1; i>=0; i--){
353 v+= get_rac(c, state+31-i)<<i;
359 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
360 const int w= b->width;
361 const int h= b->height;
365 x_and_coeff *xc= b->x_coeff;
366 x_and_coeff *prev_xc= NULL;
367 x_and_coeff *prev2_xc= xc;
368 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
369 x_and_coeff *prev_parent_xc= parent_xc;
371 runs= get_symbol2(&s->c, b->state[30], 0);
372 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
379 if(y && prev_xc->x == 0){
391 if(prev_xc->x == x + 1)
397 if(x>>1 > parent_xc->x){
400 if(x>>1 == parent_xc->x){
404 if(/*ll|*/l|lt|t|rt|p){
405 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
407 v=get_rac(&s->c, &b->state[0][context]);
409 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
410 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
417 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
419 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
420 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
429 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
430 else max_run= FFMIN(run, w-x-1);
432 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
438 (xc++)->x= w+1; //end marker
444 while(parent_xc->x != parent->width+1)
447 prev_parent_xc= parent_xc;
449 parent_xc= prev_parent_xc;
454 (xc++)->x= w+1; //end marker
457 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
458 const int w= b->width;
460 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
461 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
462 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
465 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
470 /* If we are on the second or later slice, restore our index. */
472 new_index = save_state[0];
475 for(y=start_y; y<h; y++){
478 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
479 memset(line, 0, b->width*sizeof(IDWTELEM));
480 v = b->x_coeff[new_index].coeff;
481 x = b->x_coeff[new_index++].x;
483 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
484 register int u= -(v&1);
487 v = b->x_coeff[new_index].coeff;
488 x = b->x_coeff[new_index++].x;
492 /* Save our variables for the next slice. */
493 save_state[0] = new_index;
498 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
499 int plane_index, level, orientation;
501 for(plane_index=0; plane_index<3; plane_index++){
502 for(level=0; level<MAX_DECOMPOSITIONS; level++){
503 for(orientation=level ? 1:0; orientation<4; orientation++){
504 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
508 memset(s->header_state, MID_STATE, sizeof(s->header_state));
509 memset(s->block_state, MID_STATE, sizeof(s->block_state));
512 static int alloc_blocks(SnowContext *s){
513 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
514 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
520 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
524 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
525 const int w= s->b_width << s->block_max_depth;
526 const int rem_depth= s->block_max_depth - level;
527 const int index= (x + y*w) << rem_depth;
528 const int block_w= 1<<rem_depth;
541 for(j=0; j<block_w; j++){
542 for(i=0; i<block_w; i++){
543 s->block[index + i + j*w]= block;
548 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
549 const int offset[3]= {
551 ((y*c->uvstride + x)>>1),
552 ((y*c->uvstride + x)>>1),
556 c->src[0][i]= src [i];
557 c->ref[0][i]= ref [i] + offset[i];
562 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
563 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
564 if(s->ref_frames == 1){
565 *mx = mid_pred(left->mx, top->mx, tr->mx);
566 *my = mid_pred(left->my, top->my, tr->my);
568 const int *scale = scale_mv_ref[ref];
569 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
570 (top ->mx * scale[top ->ref] + 128) >>8,
571 (tr ->mx * scale[tr ->ref] + 128) >>8);
572 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
573 (top ->my * scale[top ->ref] + 128) >>8,
574 (tr ->my * scale[tr ->ref] + 128) >>8);
578 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
579 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
580 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
582 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
586 static void decode_q_branch(SnowContext *s, int level, int x, int y){
587 const int w= s->b_width << s->block_max_depth;
588 const int rem_depth= s->block_max_depth - level;
589 const int index= (x + y*w) << rem_depth;
590 int trx= (x+1)<<rem_depth;
591 const BlockNode *left = x ? &s->block[index-1] : &null_block;
592 const BlockNode *top = y ? &s->block[index-w] : &null_block;
593 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
594 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
595 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
598 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
602 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
604 int l = left->color[0];
605 int cb= left->color[1];
606 int cr= left->color[2];
608 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
609 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
610 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
612 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
615 pred_mv(s, &mx, &my, 0, left, top, tr);
616 l += get_symbol(&s->c, &s->block_state[32], 1);
617 cb+= get_symbol(&s->c, &s->block_state[64], 1);
618 cr+= get_symbol(&s->c, &s->block_state[96], 1);
620 if(s->ref_frames > 1)
621 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
622 pred_mv(s, &mx, &my, ref, left, top, tr);
623 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
624 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
626 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
628 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
629 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
630 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
631 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
635 static void decode_blocks(SnowContext *s){
642 decode_q_branch(s, 0, x, y);
647 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
648 static const uint8_t weight[64]={
659 static const uint8_t brane[256]={
660 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
661 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
662 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
663 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
664 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
665 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
666 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
667 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
668 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
669 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
670 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
671 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
672 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
673 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
674 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
675 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
678 static const uint8_t needs[16]={
686 int16_t tmpIt [64*(32+HTAPS_MAX)];
687 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
688 int16_t *tmpI= tmpIt;
689 uint8_t *tmp2= tmp2t[0];
690 const uint8_t *hpel[11];
691 assert(dx<16 && dy<16);
692 r= brane[dx + 16*dy]&15;
693 l= brane[dx + 16*dy]>>4;
695 b= needs[l] | needs[r];
700 for(y=0; y < b_h+HTAPS_MAX-1; y++){
701 for(x=0; x < b_w; x++){
702 int a_1=src[x + HTAPS_MAX/2-4];
703 int a0= src[x + HTAPS_MAX/2-3];
704 int a1= src[x + HTAPS_MAX/2-2];
705 int a2= src[x + HTAPS_MAX/2-1];
706 int a3= src[x + HTAPS_MAX/2+0];
707 int a4= src[x + HTAPS_MAX/2+1];
708 int a5= src[x + HTAPS_MAX/2+2];
709 int a6= src[x + HTAPS_MAX/2+3];
711 if(!p || p->fast_mc){
712 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
716 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
721 if(am&(~255)) am= ~(am>>31);
730 src += HTAPS_MAX/2 - 1;
734 for(y=0; y < b_h; y++){
735 for(x=0; x < b_w+1; x++){
736 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
737 int a0= src[x + (HTAPS_MAX/2-3)*stride];
738 int a1= src[x + (HTAPS_MAX/2-2)*stride];
739 int a2= src[x + (HTAPS_MAX/2-1)*stride];
740 int a3= src[x + (HTAPS_MAX/2+0)*stride];
741 int a4= src[x + (HTAPS_MAX/2+1)*stride];
742 int a5= src[x + (HTAPS_MAX/2+2)*stride];
743 int a6= src[x + (HTAPS_MAX/2+3)*stride];
746 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
748 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
750 if(am&(~255)) am= ~(am>>31);
758 src += stride*(HTAPS_MAX/2 - 1);
762 for(y=0; y < b_h; y++){
763 for(x=0; x < b_w; x++){
764 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
765 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
766 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
767 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
768 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
769 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
770 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
771 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
774 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
776 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
777 if(am&(~255)) am= ~(am>>31);
786 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
791 hpel[ 6]= tmp2t[1] + 1;
793 hpel[ 8]= src + stride;
794 hpel[ 9]= hpel[1] + stride;
795 hpel[10]= hpel[8] + 1;
798 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
799 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
800 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
801 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
804 for(y=0; y < b_h; y++){
805 for(x=0; x < b_w; x++){
806 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
807 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
816 const uint8_t *src1= hpel[l];
817 const uint8_t *src2= hpel[r];
818 int a= weight[((dx&7) + (8*(dy&7)))];
820 for(y=0; y < b_h; y++){
821 for(x=0; x < b_w; x++){
822 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
831 #define mca(dx,dy,b_w)\
832 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
834 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
846 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
847 if(block->type & BLOCK_INTRA){
849 const int color = block->color[plane_index];
850 const int color4= color*0x01010101;
852 for(y=0; y < b_h; y++){
853 *(uint32_t*)&dst[0 + y*stride]= color4;
854 *(uint32_t*)&dst[4 + y*stride]= color4;
855 *(uint32_t*)&dst[8 + y*stride]= color4;
856 *(uint32_t*)&dst[12+ y*stride]= color4;
857 *(uint32_t*)&dst[16+ y*stride]= color4;
858 *(uint32_t*)&dst[20+ y*stride]= color4;
859 *(uint32_t*)&dst[24+ y*stride]= color4;
860 *(uint32_t*)&dst[28+ y*stride]= color4;
863 for(y=0; y < b_h; y++){
864 *(uint32_t*)&dst[0 + y*stride]= color4;
865 *(uint32_t*)&dst[4 + y*stride]= color4;
866 *(uint32_t*)&dst[8 + y*stride]= color4;
867 *(uint32_t*)&dst[12+ y*stride]= color4;
870 for(y=0; y < b_h; y++){
871 *(uint32_t*)&dst[0 + y*stride]= color4;
872 *(uint32_t*)&dst[4 + y*stride]= color4;
875 for(y=0; y < b_h; y++){
876 *(uint32_t*)&dst[0 + y*stride]= color4;
879 for(y=0; y < b_h; y++){
880 for(x=0; x < b_w; x++){
881 dst[x + y*stride]= color;
886 uint8_t *src= s->last_picture[block->ref].data[plane_index];
887 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
888 int mx= block->mx*scale;
889 int my= block->my*scale;
892 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
893 sx += (mx>>4) - (HTAPS_MAX/2-1);
894 sy += (my>>4) - (HTAPS_MAX/2-1);
895 src += sx + sy*stride;
896 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
897 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
898 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
901 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
902 // assert(!(b_w&(b_w-1)));
903 assert(b_w>1 && b_h>1);
904 assert((tab_index>=0 && tab_index<4) || b_w==32);
905 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
906 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
909 for(y=0; y<b_h; y+=16){
910 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
911 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
914 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
916 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
917 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
920 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
921 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
926 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
927 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
930 for(y=0; y<b_h; y++){
931 //FIXME ugly misuse of obmc_stride
932 const uint8_t *obmc1= obmc + y*obmc_stride;
933 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
934 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
935 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
936 dst = slice_buffer_get_line(sb, src_y + y);
937 for(x=0; x<b_w; x++){
938 int v= obmc1[x] * block[3][x + y*src_stride]
939 +obmc2[x] * block[2][x + y*src_stride]
940 +obmc3[x] * block[1][x + y*src_stride]
941 +obmc4[x] * block[0][x + y*src_stride];
943 v <<= 8 - LOG2_OBMC_MAX;
949 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
950 if(v&(~255)) v= ~(v>>31);
951 dst8[x + y*src_stride] = v;
959 //FIXME name cleanup (b_w, block_w, b_width stuff)
960 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
961 const int b_width = s->b_width << s->block_max_depth;
962 const int b_height= s->b_height << s->block_max_depth;
963 const int b_stride= b_width;
964 BlockNode *lt= &s->block[b_x + b_y*b_stride];
966 BlockNode *lb= lt+b_stride;
969 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
970 uint8_t *tmp = s->scratchbuf;
977 }else if(b_x + 1 >= b_width){
984 }else if(b_y + 1 >= b_height){
989 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
992 if(!sliced && !offset_dst)
995 }else if(src_x + b_w > w){
999 obmc -= src_y*obmc_stride;
1001 if(!sliced && !offset_dst)
1002 dst -= src_y*dst_stride;
1004 }else if(src_y + b_h> h){
1008 if(b_w<=0 || b_h<=0) return;
1010 assert(src_stride > 2*MB_SIZE + 5);
1012 if(!sliced && offset_dst)
1013 dst += src_x + src_y*dst_stride;
1014 dst8+= src_x + src_y*src_stride;
1015 // src += src_x + src_y*src_stride;
1017 ptmp= tmp + 3*tmp_step;
1020 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1022 if(same_block(lt, rt)){
1027 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1030 if(same_block(lt, lb)){
1032 }else if(same_block(rt, lb)){
1037 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1040 if(same_block(lt, rb) ){
1042 }else if(same_block(rt, rb)){
1044 }else if(same_block(lb, rb)){
1048 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1051 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1053 for(y=0; y<b_h; y++){
1054 //FIXME ugly misuse of obmc_stride
1055 const uint8_t *obmc1= obmc + y*obmc_stride;
1056 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1057 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1058 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1059 for(x=0; x<b_w; x++){
1060 int v= obmc1[x] * block[3][x + y*src_stride]
1061 +obmc2[x] * block[2][x + y*src_stride]
1062 +obmc3[x] * block[1][x + y*src_stride]
1063 +obmc4[x] * block[0][x + y*src_stride];
1065 v <<= 8 - LOG2_OBMC_MAX;
1067 v >>= 8 - FRAC_BITS;
1070 v += dst[x + y*dst_stride];
1071 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1072 if(v&(~255)) v= ~(v>>31);
1073 dst8[x + y*src_stride] = v;
1075 dst[x + y*dst_stride] -= v;
1082 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1083 Plane *p= &s->plane[plane_index];
1084 const int mb_w= s->b_width << s->block_max_depth;
1085 const int mb_h= s->b_height << s->block_max_depth;
1087 int block_size = MB_SIZE >> s->block_max_depth;
1088 int block_w = plane_index ? block_size/2 : block_size;
1089 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1090 int obmc_stride= plane_index ? block_size : 2*block_size;
1091 int ref_stride= s->current_picture.linesize[plane_index];
1092 uint8_t *dst8= s->current_picture.data[plane_index];
1096 if(s->keyframe || (s->avctx->debug&512)){
1101 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1102 // DWTELEM * line = slice_buffer_get_line(sb, y);
1103 IDWTELEM * line = sb->line[y];
1105 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1106 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1108 if(v&(~255)) v= ~(v>>31);
1109 dst8[x + y*ref_stride]= v;
1113 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1114 // DWTELEM * line = slice_buffer_get_line(sb, y);
1115 IDWTELEM * line = sb->line[y];
1117 line[x] -= 128 << FRAC_BITS;
1118 // buf[x + y*w]-= 128<<FRAC_BITS;
1126 for(mb_x=0; mb_x<=mb_w; mb_x++){
1127 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1128 block_w*mb_x - block_w/2,
1129 block_w*mb_y - block_w/2,
1132 w, ref_stride, obmc_stride,
1134 add, 0, plane_index);
1138 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1139 Plane *p= &s->plane[plane_index];
1140 const int mb_w= s->b_width << s->block_max_depth;
1141 const int mb_h= s->b_height << s->block_max_depth;
1143 int block_size = MB_SIZE >> s->block_max_depth;
1144 int block_w = plane_index ? block_size/2 : block_size;
1145 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1146 const int obmc_stride= plane_index ? block_size : 2*block_size;
1147 int ref_stride= s->current_picture.linesize[plane_index];
1148 uint8_t *dst8= s->current_picture.data[plane_index];
1152 if(s->keyframe || (s->avctx->debug&512)){
1157 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1159 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1161 if(v&(~255)) v= ~(v>>31);
1162 dst8[x + y*ref_stride]= v;
1166 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1168 buf[x + y*w]-= 128<<FRAC_BITS;
1176 for(mb_x=0; mb_x<=mb_w; mb_x++){
1177 add_yblock(s, 0, NULL, buf, dst8, obmc,
1178 block_w*mb_x - block_w/2,
1179 block_w*mb_y - block_w/2,
1182 w, ref_stride, obmc_stride,
1184 add, 1, plane_index);
1188 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1189 const int mb_h= s->b_height << s->block_max_depth;
1191 for(mb_y=0; mb_y<=mb_h; mb_y++)
1192 predict_slice(s, buf, plane_index, add, mb_y);
1195 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1196 const int w= b->width;
1197 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1198 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1199 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1202 if(s->qlog == LOSSLESS_QLOG) return;
1204 for(y=start_y; y<end_y; y++){
1205 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1206 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1210 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1212 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1218 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1219 const int w= b->width;
1222 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1226 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1228 for(y=start_y; y<end_y; y++){
1230 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1231 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1235 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1236 else line[x] += line[x - 1];
1238 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1239 else line[x] += line[x - 1];
1242 if(y) line[x] += prev[x];
1248 static void decode_qlogs(SnowContext *s){
1249 int plane_index, level, orientation;
1251 for(plane_index=0; plane_index<3; plane_index++){
1252 for(level=0; level<s->spatial_decomposition_count; level++){
1253 for(orientation=level ? 1:0; orientation<4; orientation++){
1255 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1256 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1257 else q= get_symbol(&s->c, s->header_state, 1);
1258 s->plane[plane_index].band[level][orientation].qlog= q;
1264 #define GET_S(dst, check) \
1265 tmp= get_symbol(&s->c, s->header_state, 0);\
1267 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1272 static int decode_header(SnowContext *s){
1273 int plane_index, tmp;
1276 memset(kstate, MID_STATE, sizeof(kstate));
1278 s->keyframe= get_rac(&s->c, kstate);
1279 if(s->keyframe || s->always_reset){
1281 s->spatial_decomposition_type=
1285 s->block_max_depth= 0;
1288 GET_S(s->version, tmp <= 0U)
1289 s->always_reset= get_rac(&s->c, s->header_state);
1290 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1291 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1292 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1293 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1294 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1295 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1296 s->spatial_scalability= get_rac(&s->c, s->header_state);
1297 // s->rate_scalability= get_rac(&s->c, s->header_state);
1298 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1299 s->max_ref_frames++;
1305 if(get_rac(&s->c, s->header_state)){
1306 for(plane_index=0; plane_index<2; plane_index++){
1307 int htaps, i, sum=0;
1308 Plane *p= &s->plane[plane_index];
1309 p->diag_mc= get_rac(&s->c, s->header_state);
1310 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1311 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1314 for(i= htaps/2; i; i--){
1315 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1316 sum += p->hcoeff[i];
1318 p->hcoeff[0]= 32-sum;
1320 s->plane[2].diag_mc= s->plane[1].diag_mc;
1321 s->plane[2].htaps = s->plane[1].htaps;
1322 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1324 if(get_rac(&s->c, s->header_state)){
1325 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1330 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1331 if(s->spatial_decomposition_type > 1U){
1332 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1335 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1336 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1337 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1341 s->qlog += get_symbol(&s->c, s->header_state, 1);
1342 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1343 s->qbias += get_symbol(&s->c, s->header_state, 1);
1344 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1345 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1346 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1347 s->block_max_depth= 0;
1354 static void init_qexp(void){
1358 for(i=0; i<QROOT; i++){
1360 v *= pow(2, 1.0 / QROOT);
1364 static av_cold int common_init(AVCodecContext *avctx){
1365 SnowContext *s = avctx->priv_data;
1370 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1372 dsputil_init(&s->dsp, avctx);
1373 ff_dwt_init(&s->dwt);
1376 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1377 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1378 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1379 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1380 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1381 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1400 #define mcfh(dx,dy)\
1401 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1402 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1403 mc_block_hpel ## dx ## dy ## 16;\
1404 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1405 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1406 mc_block_hpel ## dx ## dy ## 8;
1416 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1418 width= s->avctx->width;
1419 height= s->avctx->height;
1421 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1422 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1424 for(i=0; i<MAX_REF_FRAMES; i++)
1425 for(j=0; j<MAX_REF_FRAMES; j++)
1426 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1428 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1429 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1434 static int common_init_after_header(AVCodecContext *avctx){
1435 SnowContext *s = avctx->priv_data;
1436 int plane_index, level, orientation;
1438 for(plane_index=0; plane_index<3; plane_index++){
1439 int w= s->avctx->width;
1440 int h= s->avctx->height;
1443 w>>= s->chroma_h_shift;
1444 h>>= s->chroma_v_shift;
1446 s->plane[plane_index].width = w;
1447 s->plane[plane_index].height= h;
1449 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1450 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1451 SubBand *b= &s->plane[plane_index].band[level][orientation];
1453 b->buf= s->spatial_dwt_buffer;
1455 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1456 b->width = (w + !(orientation&1))>>1;
1457 b->height= (h + !(orientation>1))>>1;
1459 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1460 b->buf_x_offset = 0;
1461 b->buf_y_offset = 0;
1465 b->buf_x_offset = (w+1)>>1;
1468 b->buf += b->stride>>1;
1469 b->buf_y_offset = b->stride_line >> 1;
1471 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1474 b->parent= &s->plane[plane_index].band[level-1][orientation];
1475 //FIXME avoid this realloc
1476 av_freep(&b->x_coeff);
1477 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1492 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1493 SubBand *b= &p->band[level][orientation];
1497 int step= 1 << (s->spatial_decomposition_count - level);
1504 //FIXME bias for nonzero ?
1506 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1507 for(y=0; y<p->height; y++){
1508 for(x=0; x<p->width; x++){
1509 int sx= (x-xo + step/2) / step / Q2_STEP;
1510 int sy= (y-yo + step/2) / step / Q2_STEP;
1511 int v= r0[x + y*p->width] - r1[x + y*p->width];
1512 assert(sx>=0 && sy>=0 && sx < score_stride);
1514 score[sx + sy*score_stride] += v*v;
1515 assert(score[sx + sy*score_stride] >= 0);
1520 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1521 int level, orientation;
1523 for(level=0; level<s->spatial_decomposition_count; level++){
1524 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1525 SubBand *b= &p->band[level][orientation];
1526 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1528 dequantize(s, b, dst, b->stride);
1533 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1534 int level, orientation, ys, xs, x, y, pass;
1535 IDWTELEM best_dequant[height * stride];
1536 IDWTELEM idwt2_buffer[height * stride];
1537 const int score_stride= (width + 10)/Q2_STEP;
1538 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1539 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1540 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1542 //FIXME pass the copy cleanly ?
1544 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1545 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1547 for(level=0; level<s->spatial_decomposition_count; level++){
1548 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1549 SubBand *b= &p->band[level][orientation];
1550 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1551 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1552 assert(src == b->buf); // code does not depend on this but it is true currently
1554 quantize(s, b, dst, src, b->stride, s->qbias);
1557 for(pass=0; pass<1; pass++){
1558 if(s->qbias == 0) //keyframe
1560 for(level=0; level<s->spatial_decomposition_count; level++){
1561 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1562 SubBand *b= &p->band[level][orientation];
1563 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1564 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1566 for(ys= 0; ys<Q2_STEP; ys++){
1567 for(xs= 0; xs<Q2_STEP; xs++){
1568 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1569 dequantize_all(s, p, idwt2_buffer, width, height);
1570 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1571 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1572 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1573 for(y=ys; y<b->height; y+= Q2_STEP){
1574 for(x=xs; x<b->width; x+= Q2_STEP){
1575 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1576 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1577 //FIXME try more than just --
1580 dequantize_all(s, p, idwt2_buffer, width, height);
1581 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1582 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1583 for(y=ys; y<b->height; y+= Q2_STEP){
1584 for(x=xs; x<b->width; x+= Q2_STEP){
1585 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1586 if(score[score_idx] <= best_score[score_idx] + threshold){
1587 best_score[score_idx]= score[score_idx];
1588 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1589 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1590 //FIXME copy instead
1599 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1602 #endif /* QUANTIZE2==1 */
1604 #define USE_HALFPEL_PLANE 0
1606 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1609 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1613 int w= s->avctx->width >>is_chroma;
1614 int h= s->avctx->height >>is_chroma;
1615 int ls= frame->linesize[p];
1616 uint8_t *src= frame->data[p];
1618 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1619 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1620 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1627 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1634 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1642 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1650 static void release_buffer(AVCodecContext *avctx){
1651 SnowContext *s = avctx->priv_data;
1654 if(s->last_picture[s->max_ref_frames-1].data[0]){
1655 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1657 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1658 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1662 static int frame_start(SnowContext *s){
1664 int w= s->avctx->width; //FIXME round up to x16 ?
1665 int h= s->avctx->height;
1667 if(s->current_picture.data[0]){
1668 s->dsp.draw_edges(s->current_picture.data[0],
1669 s->current_picture.linesize[0], w , h ,
1670 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1671 s->dsp.draw_edges(s->current_picture.data[1],
1672 s->current_picture.linesize[1], w>>1, h>>1,
1673 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1674 s->dsp.draw_edges(s->current_picture.data[2],
1675 s->current_picture.linesize[2], w>>1, h>>1,
1676 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1679 release_buffer(s->avctx);
1681 tmp= s->last_picture[s->max_ref_frames-1];
1682 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1683 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1684 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1685 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1686 s->last_picture[0]= s->current_picture;
1687 s->current_picture= tmp;
1693 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
1694 if(i && s->last_picture[i-1].key_frame)
1697 if(s->ref_frames==0){
1698 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
1703 s->current_picture.reference= 1;
1704 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
1705 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1709 s->current_picture.key_frame= s->keyframe;
1714 static av_cold void common_end(SnowContext *s){
1715 int plane_index, level, orientation, i;
1717 av_freep(&s->spatial_dwt_buffer);
1718 av_freep(&s->spatial_idwt_buffer);
1721 av_freep(&s->m.me.scratchpad);
1722 av_freep(&s->m.me.map);
1723 av_freep(&s->m.me.score_map);
1724 av_freep(&s->m.obmc_scratchpad);
1726 av_freep(&s->block);
1727 av_freep(&s->scratchbuf);
1729 for(i=0; i<MAX_REF_FRAMES; i++){
1730 av_freep(&s->ref_mvs[i]);
1731 av_freep(&s->ref_scores[i]);
1732 if(s->last_picture[i].data[0])
1733 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
1736 for(plane_index=0; plane_index<3; plane_index++){
1737 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1738 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1739 SubBand *b= &s->plane[plane_index].band[level][orientation];
1741 av_freep(&b->x_coeff);
1745 if (s->mconly_picture.data[0])
1746 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
1747 if (s->current_picture.data[0])
1748 s->avctx->release_buffer(s->avctx, &s->current_picture);
1751 static av_cold int decode_init(AVCodecContext *avctx)
1753 avctx->pix_fmt= PIX_FMT_YUV420P;
1760 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
1761 const uint8_t *buf = avpkt->data;
1762 int buf_size = avpkt->size;
1763 SnowContext *s = avctx->priv_data;
1764 RangeCoder * const c= &s->c;
1766 AVFrame *picture = data;
1767 int level, orientation, plane_index;
1769 ff_init_range_decoder(c, buf, buf_size);
1770 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1772 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
1773 if(decode_header(s)<0)
1775 common_init_after_header(avctx);
1777 // realloc slice buffer for the case that spatial_decomposition_count changed
1778 ff_slice_buffer_destroy(&s->sb);
1779 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
1781 for(plane_index=0; plane_index<3; plane_index++){
1782 Plane *p= &s->plane[plane_index];
1783 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
1784 && p->hcoeff[1]==-10
1790 if(frame_start(s) < 0)
1792 //keyframe flag duplication mess FIXME
1793 if(avctx->debug&FF_DEBUG_PICT_INFO)
1794 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
1798 for(plane_index=0; plane_index<3; plane_index++){
1799 Plane *p= &s->plane[plane_index];
1803 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
1805 if(s->avctx->debug&2048){
1806 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
1807 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1811 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
1812 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
1818 for(level=0; level<s->spatial_decomposition_count; level++){
1819 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1820 SubBand *b= &p->band[level][orientation];
1821 unpack_coeffs(s, b, b->parent, orientation);
1827 const int mb_h= s->b_height << s->block_max_depth;
1828 const int block_size = MB_SIZE >> s->block_max_depth;
1829 const int block_w = plane_index ? block_size/2 : block_size;
1831 DWTCompose cs[MAX_DECOMPOSITIONS];
1836 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
1837 for(mb_y=0; mb_y<=mb_h; mb_y++){
1839 int slice_starty = block_w*mb_y;
1840 int slice_h = block_w*(mb_y+1);
1841 if (!(s->keyframe || s->avctx->debug&512)){
1842 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
1843 slice_h -= (block_w >> 1);
1846 for(level=0; level<s->spatial_decomposition_count; level++){
1847 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1848 SubBand *b= &p->band[level][orientation];
1851 int our_mb_start = mb_y;
1852 int our_mb_end = (mb_y + 1);
1854 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
1855 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
1856 if (!(s->keyframe || s->avctx->debug&512)){
1857 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1858 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1860 start_y = FFMIN(b->height, start_y);
1861 end_y = FFMIN(b->height, end_y);
1863 if (start_y != end_y){
1864 if (orientation == 0){
1865 SubBand * correlate_band = &p->band[0][0];
1866 int correlate_end_y = FFMIN(b->height, end_y + 1);
1867 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
1868 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
1869 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
1870 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
1873 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
1878 for(; yd<slice_h; yd+=4){
1879 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
1882 if(s->qlog == LOSSLESS_QLOG){
1883 for(; yq<slice_h && yq<h; yq++){
1884 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
1886 line[x] <<= FRAC_BITS;
1891 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
1893 y = FFMIN(p->height, slice_starty);
1894 end_y = FFMIN(p->height, slice_h);
1896 ff_slice_buffer_release(&s->sb, y++);
1899 ff_slice_buffer_flush(&s->sb);
1906 release_buffer(avctx);
1908 if(!(s->avctx->debug&2048))
1909 *picture= s->current_picture;
1911 *picture= s->mconly_picture;
1913 *data_size = sizeof(AVFrame);
1915 bytes_read= c->bytestream - c->bytestream_start;
1916 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
1921 static av_cold int decode_end(AVCodecContext *avctx)
1923 SnowContext *s = avctx->priv_data;
1925 ff_slice_buffer_destroy(&s->sb);
1932 AVCodec ff_snow_decoder = {
1936 sizeof(SnowContext),
1941 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
1943 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
1946 #if CONFIG_SNOW_ENCODER
1947 static av_cold int encode_init(AVCodecContext *avctx)
1949 SnowContext *s = avctx->priv_data;
1952 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
1953 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
1954 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
1958 if(avctx->prediction_method == DWT_97
1959 && (avctx->flags & CODEC_FLAG_QSCALE)
1960 && avctx->global_quality == 0){
1961 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
1965 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
1967 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
1968 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
1970 for(plane_index=0; plane_index<3; plane_index++){
1971 s->plane[plane_index].diag_mc= 1;
1972 s->plane[plane_index].htaps= 6;
1973 s->plane[plane_index].hcoeff[0]= 40;
1974 s->plane[plane_index].hcoeff[1]= -10;
1975 s->plane[plane_index].hcoeff[2]= 2;
1976 s->plane[plane_index].fast_mc= 1;
1985 s->m.flags = avctx->flags;
1986 s->m.bit_rate= avctx->bit_rate;
1989 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1990 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1991 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1992 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
1993 h263_encode_init(&s->m); //mv_penalty
1995 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
1997 if(avctx->flags&CODEC_FLAG_PASS1){
1998 if(!avctx->stats_out)
1999 avctx->stats_out = av_mallocz(256);
2001 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2002 if(ff_rate_control_init(&s->m) < 0)
2005 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2007 avctx->coded_frame= &s->current_picture;
2008 switch(avctx->pix_fmt){
2009 // case PIX_FMT_YUV444P:
2010 // case PIX_FMT_YUV422P:
2011 case PIX_FMT_YUV420P:
2013 // case PIX_FMT_YUV411P:
2014 // case PIX_FMT_YUV410P:
2015 s->colorspace_type= 0;
2017 /* case PIX_FMT_RGB32:
2021 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2024 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2025 s->chroma_h_shift= 1;
2026 s->chroma_v_shift= 1;
2028 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2029 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2031 s->avctx->get_buffer(s->avctx, &s->input_picture);
2033 if(s->avctx->me_method == ME_ITER){
2035 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2036 for(i=0; i<s->max_ref_frames; i++){
2037 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2038 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2045 //near copy & paste from dsputil, FIXME
2046 static int pix_sum(uint8_t * pix, int line_size, int w)
2051 for (i = 0; i < w; i++) {
2052 for (j = 0; j < w; j++) {
2056 pix += line_size - w;
2061 //near copy & paste from dsputil, FIXME
2062 static int pix_norm1(uint8_t * pix, int line_size, int w)
2065 uint32_t *sq = ff_squareTbl + 256;
2068 for (i = 0; i < w; i++) {
2069 for (j = 0; j < w; j ++) {
2073 pix += line_size - w;
2081 #define P_TOPRIGHT P[3]
2082 #define P_MEDIAN P[4]
2084 #define FLAG_QPEL 1 //must be 1
2086 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2087 uint8_t p_buffer[1024];
2088 uint8_t i_buffer[1024];
2089 uint8_t p_state[sizeof(s->block_state)];
2090 uint8_t i_state[sizeof(s->block_state)];
2092 uint8_t *pbbak= s->c.bytestream;
2093 uint8_t *pbbak_start= s->c.bytestream_start;
2094 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2095 const int w= s->b_width << s->block_max_depth;
2096 const int h= s->b_height << s->block_max_depth;
2097 const int rem_depth= s->block_max_depth - level;
2098 const int index= (x + y*w) << rem_depth;
2099 const int block_w= 1<<(LOG2_MB_SIZE - level);
2100 int trx= (x+1)<<rem_depth;
2101 int try= (y+1)<<rem_depth;
2102 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2103 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2104 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2105 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2106 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2107 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2108 int pl = left->color[0];
2109 int pcb= left->color[1];
2110 int pcr= left->color[2];
2114 const int stride= s->current_picture.linesize[0];
2115 const int uvstride= s->current_picture.linesize[1];
2116 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2117 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2118 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2120 int16_t last_mv[3][2];
2121 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2122 const int shift= 1+qpel;
2123 MotionEstContext *c= &s->m.me;
2124 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2125 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2126 int my_context= av_log2(2*FFABS(left->my - top->my));
2127 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2128 int ref, best_ref, ref_score, ref_mx, ref_my;
2130 assert(sizeof(s->block_state) >= 256);
2132 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2136 // clip predictors / edge ?
2138 P_LEFT[0]= left->mx;
2139 P_LEFT[1]= left->my;
2142 P_TOPRIGHT[0]= tr->mx;
2143 P_TOPRIGHT[1]= tr->my;
2145 last_mv[0][0]= s->block[index].mx;
2146 last_mv[0][1]= s->block[index].my;
2147 last_mv[1][0]= right->mx;
2148 last_mv[1][1]= right->my;
2149 last_mv[2][0]= bottom->mx;
2150 last_mv[2][1]= bottom->my;
2157 assert(c-> stride == stride);
2158 assert(c->uvstride == uvstride);
2160 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2161 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2162 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2163 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2165 c->xmin = - x*block_w - 16+3;
2166 c->ymin = - y*block_w - 16+3;
2167 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2168 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2170 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2171 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2172 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2173 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2174 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2175 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2176 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2178 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2179 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2182 c->pred_x= P_LEFT[0];
2183 c->pred_y= P_LEFT[1];
2185 c->pred_x = P_MEDIAN[0];
2186 c->pred_y = P_MEDIAN[1];
2191 for(ref=0; ref<s->ref_frames; ref++){
2192 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2194 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2195 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2197 assert(ref_mx >= c->xmin);
2198 assert(ref_mx <= c->xmax);
2199 assert(ref_my >= c->ymin);
2200 assert(ref_my <= c->ymax);
2202 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2203 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2204 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2205 if(s->ref_mvs[ref]){
2206 s->ref_mvs[ref][index][0]= ref_mx;
2207 s->ref_mvs[ref][index][1]= ref_my;
2208 s->ref_scores[ref][index]= ref_score;
2210 if(score > ref_score){
2217 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2220 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2222 pc.bytestream_start=
2223 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2224 memcpy(p_state, s->block_state, sizeof(s->block_state));
2226 if(level!=s->block_max_depth)
2227 put_rac(&pc, &p_state[4 + s_context], 1);
2228 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2229 if(s->ref_frames > 1)
2230 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2231 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2232 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2233 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2234 p_len= pc.bytestream - pc.bytestream_start;
2235 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2237 block_s= block_w*block_w;
2238 sum = pix_sum(current_data[0], stride, block_w);
2239 l= (sum + block_s/2)/block_s;
2240 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2242 block_s= block_w*block_w>>2;
2243 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2244 cb= (sum + block_s/2)/block_s;
2245 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2246 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2247 cr= (sum + block_s/2)/block_s;
2248 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2251 ic.bytestream_start=
2252 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2253 memcpy(i_state, s->block_state, sizeof(s->block_state));
2254 if(level!=s->block_max_depth)
2255 put_rac(&ic, &i_state[4 + s_context], 1);
2256 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2257 put_symbol(&ic, &i_state[32], l-pl , 1);
2258 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2259 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2260 i_len= ic.bytestream - ic.bytestream_start;
2261 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2263 // assert(score==256*256*256*64-1);
2264 assert(iscore < 255*255*256 + s->lambda2*10);
2265 assert(iscore >= 0);
2266 assert(l>=0 && l<=255);
2267 assert(pl>=0 && pl<=255);
2270 int varc= iscore >> 8;
2271 int vard= score >> 8;
2272 if (vard <= 64 || vard < varc)
2273 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2275 c->scene_change_score+= s->m.qscale;
2278 if(level!=s->block_max_depth){
2279 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2280 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2281 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2282 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2283 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2284 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2286 if(score2 < score && score2 < iscore)
2291 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2292 memcpy(pbbak, i_buffer, i_len);
2294 s->c.bytestream_start= pbbak_start;
2295 s->c.bytestream= pbbak + i_len;
2296 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2297 memcpy(s->block_state, i_state, sizeof(s->block_state));
2300 memcpy(pbbak, p_buffer, p_len);
2302 s->c.bytestream_start= pbbak_start;
2303 s->c.bytestream= pbbak + p_len;
2304 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2305 memcpy(s->block_state, p_state, sizeof(s->block_state));
2310 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2311 const int w= s->b_width << s->block_max_depth;
2312 const int rem_depth= s->block_max_depth - level;
2313 const int index= (x + y*w) << rem_depth;
2314 int trx= (x+1)<<rem_depth;
2315 BlockNode *b= &s->block[index];
2316 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2317 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2318 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2319 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2320 int pl = left->color[0];
2321 int pcb= left->color[1];
2322 int pcr= left->color[2];
2324 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2325 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2326 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2327 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2330 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2334 if(level!=s->block_max_depth){
2335 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2336 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2338 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2339 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2340 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2341 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2342 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2346 if(b->type & BLOCK_INTRA){
2347 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2348 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2349 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2350 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2351 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2352 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2354 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2355 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2356 if(s->ref_frames > 1)
2357 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2358 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2359 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2360 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2364 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2366 Plane *p= &s->plane[plane_index];
2367 const int block_size = MB_SIZE >> s->block_max_depth;
2368 const int block_w = plane_index ? block_size/2 : block_size;
2369 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2370 const int obmc_stride= plane_index ? block_size : 2*block_size;
2371 const int ref_stride= s->current_picture.linesize[plane_index];
2372 uint8_t *src= s-> input_picture.data[plane_index];
2373 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2374 const int b_stride = s->b_width << s->block_max_depth;
2375 const int w= p->width;
2376 const int h= p->height;
2377 int index= mb_x + mb_y*b_stride;
2378 BlockNode *b= &s->block[index];
2379 BlockNode backup= *b;
2383 b->type|= BLOCK_INTRA;
2384 b->color[plane_index]= 0;
2385 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2388 int mb_x2= mb_x + (i &1) - 1;
2389 int mb_y2= mb_y + (i>>1) - 1;
2390 int x= block_w*mb_x2 + block_w/2;
2391 int y= block_w*mb_y2 + block_w/2;
2393 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2394 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2396 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2397 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2398 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2399 int obmc_v= obmc[index];
2401 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2402 if(x<0) obmc_v += obmc[index + block_w];
2403 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2404 if(x+block_w>w) obmc_v += obmc[index - block_w];
2405 //FIXME precalculate this or simplify it somehow else
2407 d = -dst[index] + (1<<(FRAC_BITS-1));
2409 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2410 aa += obmc_v * obmc_v; //FIXME precalculate this
2416 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2419 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2420 const int b_stride = s->b_width << s->block_max_depth;
2421 const int b_height = s->b_height<< s->block_max_depth;
2422 int index= x + y*b_stride;
2423 const BlockNode *b = &s->block[index];
2424 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2425 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2426 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2427 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2429 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2430 // int my_context= av_log2(2*FFABS(left->my - top->my));
2432 if(x<0 || x>=b_stride || y>=b_height)
2439 00001XXXX 15-30 8-15
2441 //FIXME try accurate rate
2442 //FIXME intra and inter predictors if surrounding blocks are not the same type
2443 if(b->type & BLOCK_INTRA){
2444 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2445 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2446 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2448 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2451 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2452 + av_log2(2*FFABS(dmy))
2453 + av_log2(2*b->ref));
2457 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2458 Plane *p= &s->plane[plane_index];
2459 const int block_size = MB_SIZE >> s->block_max_depth;
2460 const int block_w = plane_index ? block_size/2 : block_size;
2461 const int obmc_stride= plane_index ? block_size : 2*block_size;
2462 const int ref_stride= s->current_picture.linesize[plane_index];
2463 uint8_t *dst= s->current_picture.data[plane_index];
2464 uint8_t *src= s-> input_picture.data[plane_index];
2465 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2466 uint8_t *cur = s->scratchbuf;
2467 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2468 const int b_stride = s->b_width << s->block_max_depth;
2469 const int b_height = s->b_height<< s->block_max_depth;
2470 const int w= p->width;
2471 const int h= p->height;
2474 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2475 int sx= block_w*mb_x - block_w/2;
2476 int sy= block_w*mb_y - block_w/2;
2477 int x0= FFMAX(0,-sx);
2478 int y0= FFMAX(0,-sy);
2479 int x1= FFMIN(block_w*2, w-sx);
2480 int y1= FFMIN(block_w*2, h-sy);
2483 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2485 for(y=y0; y<y1; y++){
2486 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2487 const IDWTELEM *pred1 = pred + y*obmc_stride;
2488 uint8_t *cur1 = cur + y*ref_stride;
2489 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2490 for(x=x0; x<x1; x++){
2491 #if FRAC_BITS >= LOG2_OBMC_MAX
2492 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2494 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2496 v = (v + pred1[x]) >> FRAC_BITS;
2497 if(v&(~255)) v= ~(v>>31);
2502 /* copy the regions where obmc[] = (uint8_t)256 */
2503 if(LOG2_OBMC_MAX == 8
2504 && (mb_x == 0 || mb_x == b_stride-1)
2505 && (mb_y == 0 || mb_y == b_height-1)){
2514 for(y=y0; y<y1; y++)
2515 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2519 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2520 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2521 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2522 * So improving the score of one block is not strictly guaranteed
2523 * to improve the score of the whole frame, thus iterative motion
2524 * estimation does not always converge. */
2525 if(s->avctx->me_cmp == FF_CMP_W97)
2526 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2527 else if(s->avctx->me_cmp == FF_CMP_W53)
2528 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2532 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2533 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2538 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2547 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2549 if(mb_x == b_stride-2)
2550 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2552 return distortion + rate*penalty_factor;
2555 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2557 Plane *p= &s->plane[plane_index];
2558 const int block_size = MB_SIZE >> s->block_max_depth;
2559 const int block_w = plane_index ? block_size/2 : block_size;
2560 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2561 const int obmc_stride= plane_index ? block_size : 2*block_size;
2562 const int ref_stride= s->current_picture.linesize[plane_index];
2563 uint8_t *dst= s->current_picture.data[plane_index];
2564 uint8_t *src= s-> input_picture.data[plane_index];
2565 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2566 // const has only been removed from zero_dst to suppress a warning
2567 static IDWTELEM zero_dst[4096]; //FIXME
2568 const int b_stride = s->b_width << s->block_max_depth;
2569 const int w= p->width;
2570 const int h= p->height;
2573 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2576 int mb_x2= mb_x + (i%3) - 1;
2577 int mb_y2= mb_y + (i/3) - 1;
2578 int x= block_w*mb_x2 + block_w/2;
2579 int y= block_w*mb_y2 + block_w/2;
2581 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2582 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2584 //FIXME find a cleaner/simpler way to skip the outside stuff
2585 for(y2= y; y2<0; y2++)
2586 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2587 for(y2= h; y2<y+block_w; y2++)
2588 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2590 for(y2= y; y2<y+block_w; y2++)
2591 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2594 for(y2= y; y2<y+block_w; y2++)
2595 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2598 assert(block_w== 8 || block_w==16);
2599 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2603 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2604 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2612 rate = get_block_bits(s, mb_x, mb_y, 2);
2613 for(i=merged?4:0; i<9; i++){
2614 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2615 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2618 return distortion + rate*penalty_factor;
2621 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2622 const int w= b->width;
2623 const int h= b->height;
2635 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2636 v= src[x + y*stride];
2639 t= src[x + (y-1)*stride];
2641 lt= src[x - 1 + (y-1)*stride];
2644 rt= src[x + 1 + (y-1)*stride];
2648 l= src[x - 1 + y*stride];
2650 if(orientation==1) ll= src[y + (x-2)*stride];
2651 else ll= src[x - 2 + y*stride];
2657 if(px<b->parent->width && py<b->parent->height)
2658 p= parent[px + py*2*stride];
2660 if(!(/*ll|*/l|lt|t|rt|p)){
2662 runs[run_index++]= run;
2670 max_index= run_index;
2671 runs[run_index++]= run;
2673 run= runs[run_index++];
2675 put_symbol2(&s->c, b->state[30], max_index, 0);
2676 if(run_index <= max_index)
2677 put_symbol2(&s->c, b->state[1], run, 3);
2680 if(s->c.bytestream_end - s->c.bytestream < w*40){
2681 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2686 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2687 v= src[x + y*stride];
2690 t= src[x + (y-1)*stride];
2692 lt= src[x - 1 + (y-1)*stride];
2695 rt= src[x + 1 + (y-1)*stride];
2699 l= src[x - 1 + y*stride];
2701 if(orientation==1) ll= src[y + (x-2)*stride];
2702 else ll= src[x - 2 + y*stride];
2708 if(px<b->parent->width && py<b->parent->height)
2709 p= parent[px + py*2*stride];
2711 if(/*ll|*/l|lt|t|rt|p){
2712 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2714 put_rac(&s->c, &b->state[0][context], !!v);
2717 run= runs[run_index++];
2719 if(run_index <= max_index)
2720 put_symbol2(&s->c, b->state[1], run, 3);
2728 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2729 int l2= 2*FFABS(l) + (l<0);
2730 int t2= 2*FFABS(t) + (t<0);
2732 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
2733 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
2741 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2742 // encode_subband_qtree(s, b, src, parent, stride, orientation);
2743 // encode_subband_z0run(s, b, src, parent, stride, orientation);
2744 return encode_subband_c0run(s, b, src, parent, stride, orientation);
2745 // encode_subband_dzr(s, b, src, parent, stride, orientation);
2748 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2749 const int b_stride= s->b_width << s->block_max_depth;
2750 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2751 BlockNode backup= *block;
2752 int rd, index, value;
2754 assert(mb_x>=0 && mb_y>=0);
2755 assert(mb_x<b_stride);
2758 block->color[0] = p[0];
2759 block->color[1] = p[1];
2760 block->color[2] = p[2];
2761 block->type |= BLOCK_INTRA;
2763 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2764 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2765 if(s->me_cache[index] == value)
2767 s->me_cache[index]= value;
2771 block->type &= ~BLOCK_INTRA;
2774 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2786 /* special case for int[2] args we discard afterwards,
2787 * fixes compilation problem with gcc 2.95 */
2788 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2789 int p[2] = {p0, p1};
2790 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2793 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2794 const int b_stride= s->b_width << s->block_max_depth;
2795 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2796 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2797 int rd, index, value;
2799 assert(mb_x>=0 && mb_y>=0);
2800 assert(mb_x<b_stride);
2801 assert(((mb_x|mb_y)&1) == 0);
2803 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2804 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2805 if(s->me_cache[index] == value)
2807 s->me_cache[index]= value;
2812 block->type &= ~BLOCK_INTRA;
2813 block[1]= block[b_stride]= block[b_stride+1]= *block;
2815 rd= get_4block_rd(s, mb_x, mb_y, 0);
2822 block[0]= backup[0];
2823 block[1]= backup[1];
2824 block[b_stride]= backup[2];
2825 block[b_stride+1]= backup[3];
2830 static void iterative_me(SnowContext *s){
2831 int pass, mb_x, mb_y;
2832 const int b_width = s->b_width << s->block_max_depth;
2833 const int b_height= s->b_height << s->block_max_depth;
2834 const int b_stride= b_width;
2838 RangeCoder r = s->c;
2839 uint8_t state[sizeof(s->block_state)];
2840 memcpy(state, s->block_state, sizeof(s->block_state));
2841 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2842 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2843 encode_q_branch(s, 0, mb_x, mb_y);
2845 memcpy(s->block_state, state, sizeof(s->block_state));
2848 for(pass=0; pass<25; pass++){
2851 for(mb_y= 0; mb_y<b_height; mb_y++){
2852 for(mb_x= 0; mb_x<b_width; mb_x++){
2853 int dia_change, i, j, ref;
2854 int best_rd= INT_MAX, ref_rd;
2855 BlockNode backup, ref_b;
2856 const int index= mb_x + mb_y * b_stride;
2857 BlockNode *block= &s->block[index];
2858 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
2859 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
2860 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
2861 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
2862 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
2863 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
2864 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2865 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2866 const int b_w= (MB_SIZE >> s->block_max_depth);
2867 uint8_t obmc_edged[b_w*2][b_w*2];
2869 if(pass && (block->type & BLOCK_OPT))
2871 block->type |= BLOCK_OPT;
2875 if(!s->me_cache_generation)
2876 memset(s->me_cache, 0, sizeof(s->me_cache));
2877 s->me_cache_generation += 1<<22;
2879 //FIXME precalculate
2882 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2884 for(y=0; y<b_w*2; y++)
2885 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2886 if(mb_x==b_stride-1)
2887 for(y=0; y<b_w*2; y++)
2888 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2890 for(x=0; x<b_w*2; x++)
2891 obmc_edged[0][x] += obmc_edged[b_w-1][x];
2892 for(y=1; y<b_w; y++)
2893 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2895 if(mb_y==b_height-1){
2896 for(x=0; x<b_w*2; x++)
2897 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2898 for(y=b_w; y<b_w*2-1; y++)
2899 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2903 //skip stuff outside the picture
2904 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
2905 uint8_t *src= s-> input_picture.data[0];
2906 uint8_t *dst= s->current_picture.data[0];
2907 const int stride= s->current_picture.linesize[0];
2908 const int block_w= MB_SIZE >> s->block_max_depth;
2909 const int sx= block_w*mb_x - block_w/2;
2910 const int sy= block_w*mb_y - block_w/2;
2911 const int w= s->plane[0].width;
2912 const int h= s->plane[0].height;
2916 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2917 for(y=h; y<sy+block_w*2; y++)
2918 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2920 for(y=sy; y<sy+block_w*2; y++)
2921 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
2923 if(sx+block_w*2 > w){
2924 for(y=sy; y<sy+block_w*2; y++)
2925 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
2929 // intra(black) = neighbors' contribution to the current block
2931 color[i]= get_dc(s, mb_x, mb_y, i);
2933 // get previous score (cannot be cached due to OBMC)
2934 if(pass > 0 && (block->type&BLOCK_INTRA)){
2935 int color0[3]= {block->color[0], block->color[1], block->color[2]};
2936 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
2938 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
2942 for(ref=0; ref < s->ref_frames; ref++){
2943 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
2944 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
2949 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
2950 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
2952 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
2954 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
2956 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
2958 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
2961 //FIXME avoid subpel interpolation / round to nearest integer
2964 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
2966 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2967 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2968 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2969 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2975 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
2978 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
2980 //FIXME or try the standard 2 pass qpel or similar
2982 mvr[0][0]= block->mx;
2983 mvr[0][1]= block->my;
2984 if(ref_rd > best_rd){
2991 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
2992 //FIXME RD style color selection
2993 if(!same_block(block, &backup)){
2994 if(tb ) tb ->type &= ~BLOCK_OPT;
2995 if(lb ) lb ->type &= ~BLOCK_OPT;
2996 if(rb ) rb ->type &= ~BLOCK_OPT;
2997 if(bb ) bb ->type &= ~BLOCK_OPT;
2998 if(tlb) tlb->type &= ~BLOCK_OPT;
2999 if(trb) trb->type &= ~BLOCK_OPT;
3000 if(blb) blb->type &= ~BLOCK_OPT;
3001 if(brb) brb->type &= ~BLOCK_OPT;
3006 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3011 if(s->block_max_depth == 1){
3013 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3014 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3016 int best_rd, init_rd;
3017 const int index= mb_x + mb_y * b_stride;
3020 b[0]= &s->block[index];
3022 b[2]= b[0]+b_stride;
3024 if(same_block(b[0], b[1]) &&
3025 same_block(b[0], b[2]) &&
3026 same_block(b[0], b[3]))
3029 if(!s->me_cache_generation)
3030 memset(s->me_cache, 0, sizeof(s->me_cache));
3031 s->me_cache_generation += 1<<22;
3033 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3035 //FIXME more multiref search?
3036 check_4block_inter(s, mb_x, mb_y,
3037 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3038 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3041 if(!(b[i]->type&BLOCK_INTRA))
3042 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3044 if(init_rd != best_rd)
3048 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3052 static void encode_blocks(SnowContext *s, int search){
3057 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3061 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3062 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3066 if(s->avctx->me_method == ME_ITER || !search)
3067 encode_q_branch2(s, 0, x, y);
3069 encode_q_branch (s, 0, x, y);
3074 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3075 const int w= b->width;
3076 const int h= b->height;
3077 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3078 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3079 int x,y, thres1, thres2;
3081 if(s->qlog == LOSSLESS_QLOG){
3084 dst[x + y*stride]= src[x + y*stride];
3088 bias= bias ? 0 : (3*qmul)>>3;
3089 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3095 int i= src[x + y*stride];
3097 if((unsigned)(i+thres1) > thres2){
3100 i/= qmul; //FIXME optimize
3101 dst[x + y*stride]= i;
3105 i/= qmul; //FIXME optimize
3106 dst[x + y*stride]= -i;
3109 dst[x + y*stride]= 0;
3115 int i= src[x + y*stride];
3117 if((unsigned)(i+thres1) > thres2){
3120 i= (i + bias) / qmul; //FIXME optimize
3121 dst[x + y*stride]= i;
3125 i= (i + bias) / qmul; //FIXME optimize
3126 dst[x + y*stride]= -i;
3129 dst[x + y*stride]= 0;
3135 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3136 const int w= b->width;
3137 const int h= b->height;
3138 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3139 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3140 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3143 if(s->qlog == LOSSLESS_QLOG) return;
3147 int i= src[x + y*stride];
3149 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3151 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3157 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3158 const int w= b->width;
3159 const int h= b->height;
3162 for(y=h-1; y>=0; y--){
3163 for(x=w-1; x>=0; x--){
3164 int i= x + y*stride;
3168 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3169 else src[i] -= src[i - 1];
3171 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3172 else src[i] -= src[i - 1];
3175 if(y) src[i] -= src[i - stride];
3181 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3182 const int w= b->width;
3183 const int h= b->height;
3188 int i= x + y*stride;
3192 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3193 else src[i] += src[i - 1];
3195 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3196 else src[i] += src[i - 1];
3199 if(y) src[i] += src[i - stride];
3205 static void encode_qlogs(SnowContext *s){
3206 int plane_index, level, orientation;
3208 for(plane_index=0; plane_index<2; plane_index++){
3209 for(level=0; level<s->spatial_decomposition_count; level++){
3210 for(orientation=level ? 1:0; orientation<4; orientation++){
3211 if(orientation==2) continue;
3212 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3218 static void encode_header(SnowContext *s){
3222 memset(kstate, MID_STATE, sizeof(kstate));
3224 put_rac(&s->c, kstate, s->keyframe);
3225 if(s->keyframe || s->always_reset){
3227 s->last_spatial_decomposition_type=
3231 s->last_block_max_depth= 0;
3232 for(plane_index=0; plane_index<2; plane_index++){
3233 Plane *p= &s->plane[plane_index];
3236 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3240 put_symbol(&s->c, s->header_state, s->version, 0);
3241 put_rac(&s->c, s->header_state, s->always_reset);
3242 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3243 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3244 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3245 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3246 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3247 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3248 put_rac(&s->c, s->header_state, s->spatial_scalability);
3249 // put_rac(&s->c, s->header_state, s->rate_scalability);
3250 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3257 for(plane_index=0; plane_index<2; plane_index++){
3258 Plane *p= &s->plane[plane_index];
3259 update_mc |= p->last_htaps != p->htaps;
3260 update_mc |= p->last_diag_mc != p->diag_mc;
3261 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3263 put_rac(&s->c, s->header_state, update_mc);
3265 for(plane_index=0; plane_index<2; plane_index++){
3266 Plane *p= &s->plane[plane_index];
3267 put_rac(&s->c, s->header_state, p->diag_mc);
3268 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3269 for(i= p->htaps/2; i; i--)
3270 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3273 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3274 put_rac(&s->c, s->header_state, 1);
3275 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3278 put_rac(&s->c, s->header_state, 0);
3281 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3282 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3283 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3284 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3285 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3289 static void update_last_header_values(SnowContext *s){
3293 for(plane_index=0; plane_index<2; plane_index++){
3294 Plane *p= &s->plane[plane_index];
3295 p->last_diag_mc= p->diag_mc;
3296 p->last_htaps = p->htaps;
3297 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3301 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3302 s->last_qlog = s->qlog;
3303 s->last_qbias = s->qbias;
3304 s->last_mv_scale = s->mv_scale;
3305 s->last_block_max_depth = s->block_max_depth;
3306 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3309 static int qscale2qlog(int qscale){
3310 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3311 + 61*QROOT/8; //<64 >60
3314 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3316 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3317 * FIXME we know exact mv bits at this point,
3318 * but ratecontrol isn't set up to include them. */
3319 uint32_t coef_sum= 0;
3320 int level, orientation, delta_qlog;
3322 for(level=0; level<s->spatial_decomposition_count; level++){
3323 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3324 SubBand *b= &s->plane[0].band[level][orientation];
3325 IDWTELEM *buf= b->ibuf;
3326 const int w= b->width;
3327 const int h= b->height;
3328 const int stride= b->stride;
3329 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3330 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3331 const int qdiv= (1<<16)/qmul;
3333 //FIXME this is ugly
3336 buf[x+y*stride]= b->buf[x+y*stride];
3338 decorrelate(s, b, buf, stride, 1, 0);
3341 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3345 /* ugly, ratecontrol just takes a sqrt again */
3346 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3347 assert(coef_sum < INT_MAX);
3349 if(pict->pict_type == AV_PICTURE_TYPE_I){
3350 s->m.current_picture.mb_var_sum= coef_sum;
3351 s->m.current_picture.mc_mb_var_sum= 0;
3353 s->m.current_picture.mc_mb_var_sum= coef_sum;
3354 s->m.current_picture.mb_var_sum= 0;
3357 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3358 if (pict->quality < 0)
3360 s->lambda= pict->quality * 3/2;
3361 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3362 s->qlog+= delta_qlog;
3366 static void calculate_visual_weight(SnowContext *s, Plane *p){
3367 int width = p->width;
3368 int height= p->height;
3369 int level, orientation, x, y;
3371 for(level=0; level<s->spatial_decomposition_count; level++){
3372 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3373 SubBand *b= &p->band[level][orientation];
3374 IDWTELEM *ibuf= b->ibuf;
3377 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3378 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3379 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3380 for(y=0; y<height; y++){
3381 for(x=0; x<width; x++){
3382 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3387 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3392 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3393 SnowContext *s = avctx->priv_data;
3394 RangeCoder * const c= &s->c;
3395 AVFrame *pict = data;
3396 const int width= s->avctx->width;
3397 const int height= s->avctx->height;
3398 int level, orientation, plane_index, i, y;
3399 uint8_t rc_header_bak[sizeof(s->header_state)];
3400 uint8_t rc_block_bak[sizeof(s->block_state)];
3402 ff_init_range_encoder(c, buf, buf_size);
3403 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3407 for(y=0; y<(height>>shift); y++)
3408 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3409 &pict->data[i][y * pict->linesize[i]],
3412 s->new_picture = *pict;
3414 s->m.picture_number= avctx->frame_number;
3415 if(avctx->flags&CODEC_FLAG_PASS2){
3417 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3418 s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
3419 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3420 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3421 if (pict->quality < 0)
3425 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3427 pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3430 if(s->pass1_rc && avctx->frame_number == 0)
3431 pict->quality= 2*FF_QP2LAMBDA;
3433 s->qlog= qscale2qlog(pict->quality);
3434 s->lambda = pict->quality * 3/2;
3436 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3437 s->qlog= LOSSLESS_QLOG;
3439 }//else keep previous frame's qlog until after motion estimation
3443 s->m.current_picture_ptr= &s->m.current_picture;
3444 s->m.last_picture.pts= s->m.current_picture.pts;
3445 s->m.current_picture.pts= pict->pts;
3446 if(pict->pict_type == AV_PICTURE_TYPE_P){
3447 int block_width = (width +15)>>4;
3448 int block_height= (height+15)>>4;
3449 int stride= s->current_picture.linesize[0];
3451 assert(s->current_picture.data[0]);
3452 assert(s->last_picture[0].data[0]);
3454 s->m.avctx= s->avctx;
3455 s->m.current_picture.data[0]= s->current_picture.data[0];
3456 s->m. last_picture.data[0]= s->last_picture[0].data[0];
3457 s->m. new_picture.data[0]= s-> input_picture.data[0];
3458 s->m. last_picture_ptr= &s->m. last_picture;
3460 s->m. last_picture.linesize[0]=
3461 s->m. new_picture.linesize[0]=
3462 s->m.current_picture.linesize[0]= stride;
3463 s->m.uvlinesize= s->current_picture.linesize[1];
3465 s->m.height= height;
3466 s->m.mb_width = block_width;
3467 s->m.mb_height= block_height;
3468 s->m.mb_stride= s->m.mb_width+1;
3469 s->m.b8_stride= 2*s->m.mb_width+1;
3471 s->m.pict_type= pict->pict_type;
3472 s->m.me_method= s->avctx->me_method;
3473 s->m.me.scene_change_score=0;
3474 s->m.flags= s->avctx->flags;
3475 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3476 s->m.out_format= FMT_H263;
3477 s->m.unrestricted_mv= 1;
3479 s->m.lambda = s->lambda;
3480 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3481 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3483 s->m.dsp= s->dsp; //move
3489 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3490 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3495 if(pict->pict_type == AV_PICTURE_TYPE_I)
3496 s->spatial_decomposition_count= 5;
3498 s->spatial_decomposition_count= 5;
3500 s->m.pict_type = pict->pict_type;
3501 s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
3503 common_init_after_header(avctx);
3505 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3506 for(plane_index=0; plane_index<3; plane_index++){
3507 calculate_visual_weight(s, &s->plane[plane_index]);
3512 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3513 encode_blocks(s, 1);
3514 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3516 for(plane_index=0; plane_index<3; plane_index++){
3517 Plane *p= &s->plane[plane_index];
3521 // int bits= put_bits_count(&s->c.pb);
3523 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3525 if(pict->data[plane_index]) //FIXME gray hack
3528 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3531 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3534 && pict->pict_type == AV_PICTURE_TYPE_P
3535 && !(avctx->flags&CODEC_FLAG_PASS2)
3536 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3537 ff_init_range_encoder(c, buf, buf_size);
3538 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3539 pict->pict_type= AV_PICTURE_TYPE_I;
3541 s->current_picture.key_frame=1;
3545 if(s->qlog == LOSSLESS_QLOG){
3548 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3554 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3560 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3562 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3564 if(s->pass1_rc && plane_index==0){
3565 int delta_qlog = ratecontrol_1pass(s, pict);
3566 if (delta_qlog <= INT_MIN)
3569 //reordering qlog in the bitstream would eliminate this reset
3570 ff_init_range_encoder(c, buf, buf_size);
3571 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3572 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3574 encode_blocks(s, 0);
3578 for(level=0; level<s->spatial_decomposition_count; level++){
3579 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3580 SubBand *b= &p->band[level][orientation];
3583 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3585 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
3586 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3587 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3589 correlate(s, b, b->ibuf, b->stride, 1, 0);
3593 for(level=0; level<s->spatial_decomposition_count; level++){
3594 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3595 SubBand *b= &p->band[level][orientation];
3597 dequantize(s, b, b->ibuf, b->stride);
3601 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3602 if(s->qlog == LOSSLESS_QLOG){
3605 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3609 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3612 if(pict->pict_type == AV_PICTURE_TYPE_I){
3615 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3616 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3620 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3621 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3624 if(s->avctx->flags&CODEC_FLAG_PSNR){
3627 if(pict->data[plane_index]) //FIXME gray hack
3630 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3634 s->avctx->error[plane_index] += error;
3635 s->current_picture.error[plane_index] = error;
3640 update_last_header_values(s);
3642 release_buffer(avctx);
3644 s->current_picture.coded_picture_number = avctx->frame_number;
3645 s->current_picture.pict_type = pict->pict_type;
3646 s->current_picture.quality = pict->quality;
3647 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3648 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3649 s->m.current_picture.display_picture_number =
3650 s->m.current_picture.coded_picture_number = avctx->frame_number;
3651 s->m.current_picture.quality = pict->quality;
3652 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3654 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3656 if(avctx->flags&CODEC_FLAG_PASS1)
3657 ff_write_pass1_stats(&s->m);
3658 s->m.last_pict_type = s->m.pict_type;
3659 avctx->frame_bits = s->m.frame_bits;
3660 avctx->mv_bits = s->m.mv_bits;
3661 avctx->misc_bits = s->m.misc_bits;
3662 avctx->p_tex_bits = s->m.p_tex_bits;
3666 return ff_rac_terminate(c);
3669 static av_cold int encode_end(AVCodecContext *avctx)
3671 SnowContext *s = avctx->priv_data;
3674 if (s->input_picture.data[0])
3675 avctx->release_buffer(avctx, &s->input_picture);
3676 av_free(avctx->stats_out);
3681 AVCodec ff_snow_encoder = {
3685 sizeof(SnowContext),
3689 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3699 #include "libavutil/lfg.h"
3704 int buffer[2][width*height];
3708 s.spatial_decomposition_count=6;
3709 s.spatial_decomposition_type=1;
3711 av_lfg_init(&prng, 1);
3713 printf("testing 5/3 DWT\n");
3714 for(i=0; i<width*height; i++)
3715 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3717 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3718 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3720 for(i=0; i<width*height; i++)
3721 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3723 printf("testing 9/7 DWT\n");
3724 s.spatial_decomposition_type=0;
3725 for(i=0; i<width*height; i++)
3726 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3728 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3729 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3731 for(i=0; i<width*height; i++)
3732 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3735 int level, orientation, x, y;
3736 int64_t errors[8][4];
3739 memset(errors, 0, sizeof(errors));
3740 s.spatial_decomposition_count=3;
3741 s.spatial_decomposition_type=0;
3742 for(level=0; level<s.spatial_decomposition_count; level++){
3743 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3744 int w= width >> (s.spatial_decomposition_count-level);
3745 int h= height >> (s.spatial_decomposition_count-level);
3746 int stride= width << (s.spatial_decomposition_count-level);
3747 DWTELEM *buf= buffer[0];
3750 if(orientation&1) buf+=w;
3751 if(orientation>1) buf+=stride>>1;
3753 memset(buffer[0], 0, sizeof(int)*width*height);
3754 buf[w/2 + h/2*stride]= 256*256;
3755 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3756 for(y=0; y<height; y++){
3757 for(x=0; x<width; x++){
3758 int64_t d= buffer[0][x + y*width];
3760 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
3762 if(FFABS(height/2-y)<9 && level==2) printf("\n");
3764 error= (int)(sqrt(error)+0.5);
3765 errors[level][orientation]= error;
3766 if(g) g=av_gcd(g, error);
3770 printf("static int const visual_weight[][4]={\n");
3771 for(level=0; level<s.spatial_decomposition_count; level++){
3773 for(orientation=0; orientation<4; orientation++){
3774 printf("%8"PRId64",", errors[level][orientation]/g);
3781 int w= width >> (s.spatial_decomposition_count-level);
3782 //int h= height >> (s.spatial_decomposition_count-level);
3783 int stride= width << (s.spatial_decomposition_count-level);
3784 DWTELEM *buf= buffer[0];
3790 memset(buffer[0], 0, sizeof(int)*width*height);
3791 for(y=0; y<height; y++){
3792 for(x=0; x<width; x++){
3793 int tab[4]={0,2,3,1};
3794 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3797 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3798 for(y=0; y<height; y++){
3799 for(x=0; x<width; x++){
3800 int64_t d= buffer[0][x + y*width];
3802 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
3804 if(FFABS(height/2-y)<9) printf("\n");