2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
27 #include "rangecoder.h"
30 #include "mpegvideo.h"
36 static const int8_t quant3bA[256]={
37 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
38 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
39 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
40 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
41 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
42 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
43 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
44 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
45 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
46 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
47 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
48 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
49 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
50 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
51 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
52 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
55 static const uint8_t obmc32[1024]={
56 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
57 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
58 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
59 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
60 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
61 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
62 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
63 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
64 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
65 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
66 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
67 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
68 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
69 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
70 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
71 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
72 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
73 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
74 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
75 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
76 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
77 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
78 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
79 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
80 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
81 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
82 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
83 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
84 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
85 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
86 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
87 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
90 static const uint8_t obmc16[256]={
91 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
92 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
93 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
94 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
95 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
96 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
97 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
98 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
99 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
100 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
101 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
102 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
103 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
104 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
105 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
106 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
111 static const uint8_t obmc8[64]={
112 4, 12, 20, 28, 28, 20, 12, 4,
113 12, 36, 60, 84, 84, 60, 36, 12,
114 20, 60,100,140,140,100, 60, 20,
115 28, 84,140,196,196,140, 84, 28,
116 28, 84,140,196,196,140, 84, 28,
117 20, 60,100,140,140,100, 60, 20,
118 12, 36, 60, 84, 84, 60, 36, 12,
119 4, 12, 20, 28, 28, 20, 12, 4,
124 static const uint8_t obmc4[16]={
132 static const uint8_t * const obmc_tab[4]={
133 obmc32, obmc16, obmc8, obmc4
136 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
138 typedef struct BlockNode{
144 //#define TYPE_SPLIT 1
145 #define BLOCK_INTRA 1
147 //#define TYPE_NOCOLOR 4
148 uint8_t level; //FIXME merge into type?
151 static const BlockNode null_block= { //FIXME add border maybe
152 .color= {128,128,128},
160 #define LOG2_MB_SIZE 4
161 #define MB_SIZE (1<<LOG2_MB_SIZE)
162 #define ENCODER_EXTRA_BITS 4
165 typedef struct x_and_coeff{
170 typedef struct SubBand{
175 int qlog; ///< log(qscale)/log[2^(1/6)]
180 int stride_line; ///< Stride measured in lines, not pixels.
181 x_and_coeff * x_coeff;
182 struct SubBand *parent;
183 uint8_t state[/*7*2*/ 7 + 512][32];
186 typedef struct Plane{
189 SubBand band[MAX_DECOMPOSITIONS][4];
192 int8_t hcoeff[HTAPS_MAX/2];
197 int8_t last_hcoeff[HTAPS_MAX/2];
201 typedef struct SnowContext{
203 AVCodecContext *avctx;
208 AVFrame input_picture; ///< new_picture with the internal linesizes
209 AVFrame current_picture;
210 AVFrame last_picture[MAX_REF_FRAMES];
211 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
212 AVFrame mconly_picture;
213 // uint8_t q_context[16];
214 uint8_t header_state[32];
215 uint8_t block_state[128 + 32*128];
219 int spatial_decomposition_type;
220 int last_spatial_decomposition_type;
221 int temporal_decomposition_type;
222 int spatial_decomposition_count;
223 int last_spatial_decomposition_count;
224 int temporal_decomposition_count;
227 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
228 uint32_t *ref_scores[MAX_REF_FRAMES];
229 DWTELEM *spatial_dwt_buffer;
230 IDWTELEM *spatial_idwt_buffer;
234 int spatial_scalability;
244 #define QBIAS_SHIFT 3
248 int last_block_max_depth;
249 Plane plane[MAX_PLANES];
251 #define ME_CACHE_SIZE 1024
252 int me_cache[ME_CACHE_SIZE];
253 int me_cache_generation;
256 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
262 // Avoid a name clash on SGI IRIX
265 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
266 static uint8_t qexp[QROOT];
268 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
272 const int a= FFABS(v);
273 const int e= av_log2(a);
274 const int el= FFMIN(e, 10);
275 put_rac(c, state+0, 0);
278 put_rac(c, state+1+i, 1); //1..10
281 put_rac(c, state+1+9, 1); //1..10
283 put_rac(c, state+1+FFMIN(i,9), 0);
285 for(i=e-1; i>=el; i--){
286 put_rac(c, state+22+9, (a>>i)&1); //22..31
289 put_rac(c, state+22+i, (a>>i)&1); //22..31
293 put_rac(c, state+11 + el, v < 0); //11..21
295 put_rac(c, state+0, 1);
299 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
300 if(get_rac(c, state+0))
305 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
310 for(i=e-1; i>=0; i--){
311 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
314 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
319 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
321 int r= log2>=0 ? 1<<log2 : 1;
327 put_rac(c, state+4+log2, 1);
332 put_rac(c, state+4+log2, 0);
334 for(i=log2-1; i>=0; i--){
335 put_rac(c, state+31-i, (v>>i)&1);
339 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
341 int r= log2>=0 ? 1<<log2 : 1;
346 while(get_rac(c, state+4+log2)){
352 for(i=log2-1; i>=0; i--){
353 v+= get_rac(c, state+31-i)<<i;
359 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
360 const int w= b->width;
361 const int h= b->height;
365 x_and_coeff *xc= b->x_coeff;
366 x_and_coeff *prev_xc= NULL;
367 x_and_coeff *prev2_xc= xc;
368 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
369 x_and_coeff *prev_parent_xc= parent_xc;
371 runs= get_symbol2(&s->c, b->state[30], 0);
372 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
379 if(y && prev_xc->x == 0){
391 if(prev_xc->x == x + 1)
397 if(x>>1 > parent_xc->x){
400 if(x>>1 == parent_xc->x){
404 if(/*ll|*/l|lt|t|rt|p){
405 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
407 v=get_rac(&s->c, &b->state[0][context]);
409 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
410 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
417 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
419 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
420 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
429 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
430 else max_run= FFMIN(run, w-x-1);
432 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
438 (xc++)->x= w+1; //end marker
444 while(parent_xc->x != parent->width+1)
447 prev_parent_xc= parent_xc;
449 parent_xc= prev_parent_xc;
454 (xc++)->x= w+1; //end marker
457 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
458 const int w= b->width;
460 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
461 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
462 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
465 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
470 /* If we are on the second or later slice, restore our index. */
472 new_index = save_state[0];
475 for(y=start_y; y<h; y++){
478 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
479 memset(line, 0, b->width*sizeof(IDWTELEM));
480 v = b->x_coeff[new_index].coeff;
481 x = b->x_coeff[new_index++].x;
483 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
484 register int u= -(v&1);
487 v = b->x_coeff[new_index].coeff;
488 x = b->x_coeff[new_index++].x;
492 /* Save our variables for the next slice. */
493 save_state[0] = new_index;
498 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
499 int plane_index, level, orientation;
501 for(plane_index=0; plane_index<3; plane_index++){
502 for(level=0; level<MAX_DECOMPOSITIONS; level++){
503 for(orientation=level ? 1:0; orientation<4; orientation++){
504 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
508 memset(s->header_state, MID_STATE, sizeof(s->header_state));
509 memset(s->block_state, MID_STATE, sizeof(s->block_state));
512 static int alloc_blocks(SnowContext *s){
513 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
514 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
520 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
524 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
525 const int w= s->b_width << s->block_max_depth;
526 const int rem_depth= s->block_max_depth - level;
527 const int index= (x + y*w) << rem_depth;
528 const int block_w= 1<<rem_depth;
541 for(j=0; j<block_w; j++){
542 for(i=0; i<block_w; i++){
543 s->block[index + i + j*w]= block;
548 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
549 const int offset[3]= {
551 ((y*c->uvstride + x)>>1),
552 ((y*c->uvstride + x)>>1),
556 c->src[0][i]= src [i];
557 c->ref[0][i]= ref [i] + offset[i];
562 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
563 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
564 if(s->ref_frames == 1){
565 *mx = mid_pred(left->mx, top->mx, tr->mx);
566 *my = mid_pred(left->my, top->my, tr->my);
568 const int *scale = scale_mv_ref[ref];
569 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
570 (top ->mx * scale[top ->ref] + 128) >>8,
571 (tr ->mx * scale[tr ->ref] + 128) >>8);
572 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
573 (top ->my * scale[top ->ref] + 128) >>8,
574 (tr ->my * scale[tr ->ref] + 128) >>8);
578 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
579 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
580 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
582 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
586 static void decode_q_branch(SnowContext *s, int level, int x, int y){
587 const int w= s->b_width << s->block_max_depth;
588 const int rem_depth= s->block_max_depth - level;
589 const int index= (x + y*w) << rem_depth;
590 int trx= (x+1)<<rem_depth;
591 const BlockNode *left = x ? &s->block[index-1] : &null_block;
592 const BlockNode *top = y ? &s->block[index-w] : &null_block;
593 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
594 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
595 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
598 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
602 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
604 int l = left->color[0];
605 int cb= left->color[1];
606 int cr= left->color[2];
608 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
609 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
610 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
612 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
615 pred_mv(s, &mx, &my, 0, left, top, tr);
616 l += get_symbol(&s->c, &s->block_state[32], 1);
617 cb+= get_symbol(&s->c, &s->block_state[64], 1);
618 cr+= get_symbol(&s->c, &s->block_state[96], 1);
620 if(s->ref_frames > 1)
621 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
622 pred_mv(s, &mx, &my, ref, left, top, tr);
623 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
624 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
626 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
628 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
629 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
630 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
631 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
635 static void decode_blocks(SnowContext *s){
642 decode_q_branch(s, 0, x, y);
647 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
648 static const uint8_t weight[64]={
659 static const uint8_t brane[256]={
660 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
661 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
662 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
663 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
664 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
665 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
666 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
667 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
668 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
669 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
670 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
671 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
672 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
673 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
674 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
675 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
678 static const uint8_t needs[16]={
686 int16_t tmpIt [64*(32+HTAPS_MAX)];
687 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
688 int16_t *tmpI= tmpIt;
689 uint8_t *tmp2= tmp2t[0];
690 const uint8_t *hpel[11];
691 assert(dx<16 && dy<16);
692 r= brane[dx + 16*dy]&15;
693 l= brane[dx + 16*dy]>>4;
695 b= needs[l] | needs[r];
700 for(y=0; y < b_h+HTAPS_MAX-1; y++){
701 for(x=0; x < b_w; x++){
702 int a_1=src[x + HTAPS_MAX/2-4];
703 int a0= src[x + HTAPS_MAX/2-3];
704 int a1= src[x + HTAPS_MAX/2-2];
705 int a2= src[x + HTAPS_MAX/2-1];
706 int a3= src[x + HTAPS_MAX/2+0];
707 int a4= src[x + HTAPS_MAX/2+1];
708 int a5= src[x + HTAPS_MAX/2+2];
709 int a6= src[x + HTAPS_MAX/2+3];
711 if(!p || p->fast_mc){
712 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
716 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
721 if(am&(~255)) am= ~(am>>31);
730 src += HTAPS_MAX/2 - 1;
734 for(y=0; y < b_h; y++){
735 for(x=0; x < b_w+1; x++){
736 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
737 int a0= src[x + (HTAPS_MAX/2-3)*stride];
738 int a1= src[x + (HTAPS_MAX/2-2)*stride];
739 int a2= src[x + (HTAPS_MAX/2-1)*stride];
740 int a3= src[x + (HTAPS_MAX/2+0)*stride];
741 int a4= src[x + (HTAPS_MAX/2+1)*stride];
742 int a5= src[x + (HTAPS_MAX/2+2)*stride];
743 int a6= src[x + (HTAPS_MAX/2+3)*stride];
746 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
748 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
750 if(am&(~255)) am= ~(am>>31);
758 src += stride*(HTAPS_MAX/2 - 1);
762 for(y=0; y < b_h; y++){
763 for(x=0; x < b_w; x++){
764 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
765 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
766 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
767 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
768 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
769 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
770 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
771 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
774 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
776 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
777 if(am&(~255)) am= ~(am>>31);
786 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
791 hpel[ 6]= tmp2t[1] + 1;
793 hpel[ 8]= src + stride;
794 hpel[ 9]= hpel[1] + stride;
795 hpel[10]= hpel[8] + 1;
798 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
799 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
800 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
801 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
804 for(y=0; y < b_h; y++){
805 for(x=0; x < b_w; x++){
806 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
807 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
816 const uint8_t *src1= hpel[l];
817 const uint8_t *src2= hpel[r];
818 int a= weight[((dx&7) + (8*(dy&7)))];
820 for(y=0; y < b_h; y++){
821 for(x=0; x < b_w; x++){
822 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
831 #define mca(dx,dy,b_w)\
832 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
834 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
846 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
847 if(block->type & BLOCK_INTRA){
849 const int color = block->color[plane_index];
850 const int color4= color*0x01010101;
852 for(y=0; y < b_h; y++){
853 *(uint32_t*)&dst[0 + y*stride]= color4;
854 *(uint32_t*)&dst[4 + y*stride]= color4;
855 *(uint32_t*)&dst[8 + y*stride]= color4;
856 *(uint32_t*)&dst[12+ y*stride]= color4;
857 *(uint32_t*)&dst[16+ y*stride]= color4;
858 *(uint32_t*)&dst[20+ y*stride]= color4;
859 *(uint32_t*)&dst[24+ y*stride]= color4;
860 *(uint32_t*)&dst[28+ y*stride]= color4;
863 for(y=0; y < b_h; y++){
864 *(uint32_t*)&dst[0 + y*stride]= color4;
865 *(uint32_t*)&dst[4 + y*stride]= color4;
866 *(uint32_t*)&dst[8 + y*stride]= color4;
867 *(uint32_t*)&dst[12+ y*stride]= color4;
870 for(y=0; y < b_h; y++){
871 *(uint32_t*)&dst[0 + y*stride]= color4;
872 *(uint32_t*)&dst[4 + y*stride]= color4;
875 for(y=0; y < b_h; y++){
876 *(uint32_t*)&dst[0 + y*stride]= color4;
879 for(y=0; y < b_h; y++){
880 for(x=0; x < b_w; x++){
881 dst[x + y*stride]= color;
886 uint8_t *src= s->last_picture[block->ref].data[plane_index];
887 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
888 int mx= block->mx*scale;
889 int my= block->my*scale;
892 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
893 sx += (mx>>4) - (HTAPS_MAX/2-1);
894 sy += (my>>4) - (HTAPS_MAX/2-1);
895 src += sx + sy*stride;
896 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
897 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
898 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
901 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
902 // assert(!(b_w&(b_w-1)));
903 assert(b_w>1 && b_h>1);
904 assert((tab_index>=0 && tab_index<4) || b_w==32);
905 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
906 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
909 for(y=0; y<b_h; y+=16){
910 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
911 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
914 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
916 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
917 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
920 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
921 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
926 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
927 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
930 for(y=0; y<b_h; y++){
931 //FIXME ugly misuse of obmc_stride
932 const uint8_t *obmc1= obmc + y*obmc_stride;
933 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
934 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
935 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
936 dst = slice_buffer_get_line(sb, src_y + y);
937 for(x=0; x<b_w; x++){
938 int v= obmc1[x] * block[3][x + y*src_stride]
939 +obmc2[x] * block[2][x + y*src_stride]
940 +obmc3[x] * block[1][x + y*src_stride]
941 +obmc4[x] * block[0][x + y*src_stride];
943 v <<= 8 - LOG2_OBMC_MAX;
949 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
950 if(v&(~255)) v= ~(v>>31);
951 dst8[x + y*src_stride] = v;
959 //FIXME name cleanup (b_w, block_w, b_width stuff)
960 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
961 const int b_width = s->b_width << s->block_max_depth;
962 const int b_height= s->b_height << s->block_max_depth;
963 const int b_stride= b_width;
964 BlockNode *lt= &s->block[b_x + b_y*b_stride];
966 BlockNode *lb= lt+b_stride;
969 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
970 uint8_t *tmp = s->scratchbuf;
977 }else if(b_x + 1 >= b_width){
984 }else if(b_y + 1 >= b_height){
989 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
992 if(!sliced && !offset_dst)
995 }else if(src_x + b_w > w){
999 obmc -= src_y*obmc_stride;
1001 if(!sliced && !offset_dst)
1002 dst -= src_y*dst_stride;
1004 }else if(src_y + b_h> h){
1008 if(b_w<=0 || b_h<=0) return;
1010 assert(src_stride > 2*MB_SIZE + 5);
1012 if(!sliced && offset_dst)
1013 dst += src_x + src_y*dst_stride;
1014 dst8+= src_x + src_y*src_stride;
1015 // src += src_x + src_y*src_stride;
1017 ptmp= tmp + 3*tmp_step;
1020 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1022 if(same_block(lt, rt)){
1027 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1030 if(same_block(lt, lb)){
1032 }else if(same_block(rt, lb)){
1037 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1040 if(same_block(lt, rb) ){
1042 }else if(same_block(rt, rb)){
1044 }else if(same_block(lb, rb)){
1048 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1051 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1053 for(y=0; y<b_h; y++){
1054 //FIXME ugly misuse of obmc_stride
1055 const uint8_t *obmc1= obmc + y*obmc_stride;
1056 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1057 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1058 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1059 for(x=0; x<b_w; x++){
1060 int v= obmc1[x] * block[3][x + y*src_stride]
1061 +obmc2[x] * block[2][x + y*src_stride]
1062 +obmc3[x] * block[1][x + y*src_stride]
1063 +obmc4[x] * block[0][x + y*src_stride];
1065 v <<= 8 - LOG2_OBMC_MAX;
1067 v >>= 8 - FRAC_BITS;
1070 v += dst[x + y*dst_stride];
1071 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1072 if(v&(~255)) v= ~(v>>31);
1073 dst8[x + y*src_stride] = v;
1075 dst[x + y*dst_stride] -= v;
1082 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1083 Plane *p= &s->plane[plane_index];
1084 const int mb_w= s->b_width << s->block_max_depth;
1085 const int mb_h= s->b_height << s->block_max_depth;
1087 int block_size = MB_SIZE >> s->block_max_depth;
1088 int block_w = plane_index ? block_size/2 : block_size;
1089 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1090 int obmc_stride= plane_index ? block_size : 2*block_size;
1091 int ref_stride= s->current_picture.linesize[plane_index];
1092 uint8_t *dst8= s->current_picture.data[plane_index];
1096 if(s->keyframe || (s->avctx->debug&512)){
1101 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1102 // DWTELEM * line = slice_buffer_get_line(sb, y);
1103 IDWTELEM * line = sb->line[y];
1105 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1106 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1108 if(v&(~255)) v= ~(v>>31);
1109 dst8[x + y*ref_stride]= v;
1113 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1114 // DWTELEM * line = slice_buffer_get_line(sb, y);
1115 IDWTELEM * line = sb->line[y];
1117 line[x] -= 128 << FRAC_BITS;
1118 // buf[x + y*w]-= 128<<FRAC_BITS;
1126 for(mb_x=0; mb_x<=mb_w; mb_x++){
1127 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1128 block_w*mb_x - block_w/2,
1129 block_w*mb_y - block_w/2,
1132 w, ref_stride, obmc_stride,
1134 add, 0, plane_index);
1138 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1139 Plane *p= &s->plane[plane_index];
1140 const int mb_w= s->b_width << s->block_max_depth;
1141 const int mb_h= s->b_height << s->block_max_depth;
1143 int block_size = MB_SIZE >> s->block_max_depth;
1144 int block_w = plane_index ? block_size/2 : block_size;
1145 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1146 const int obmc_stride= plane_index ? block_size : 2*block_size;
1147 int ref_stride= s->current_picture.linesize[plane_index];
1148 uint8_t *dst8= s->current_picture.data[plane_index];
1152 if(s->keyframe || (s->avctx->debug&512)){
1157 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1159 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1161 if(v&(~255)) v= ~(v>>31);
1162 dst8[x + y*ref_stride]= v;
1166 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1168 buf[x + y*w]-= 128<<FRAC_BITS;
1176 for(mb_x=0; mb_x<=mb_w; mb_x++){
1177 add_yblock(s, 0, NULL, buf, dst8, obmc,
1178 block_w*mb_x - block_w/2,
1179 block_w*mb_y - block_w/2,
1182 w, ref_stride, obmc_stride,
1184 add, 1, plane_index);
1188 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1189 const int mb_h= s->b_height << s->block_max_depth;
1191 for(mb_y=0; mb_y<=mb_h; mb_y++)
1192 predict_slice(s, buf, plane_index, add, mb_y);
1195 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1196 const int w= b->width;
1197 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1198 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1199 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1202 if(s->qlog == LOSSLESS_QLOG) return;
1204 for(y=start_y; y<end_y; y++){
1205 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1206 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1210 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1212 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1218 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1219 const int w= b->width;
1222 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1226 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1228 for(y=start_y; y<end_y; y++){
1230 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1231 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1235 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1236 else line[x] += line[x - 1];
1238 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1239 else line[x] += line[x - 1];
1242 if(y) line[x] += prev[x];
1248 static void decode_qlogs(SnowContext *s){
1249 int plane_index, level, orientation;
1251 for(plane_index=0; plane_index<3; plane_index++){
1252 for(level=0; level<s->spatial_decomposition_count; level++){
1253 for(orientation=level ? 1:0; orientation<4; orientation++){
1255 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1256 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1257 else q= get_symbol(&s->c, s->header_state, 1);
1258 s->plane[plane_index].band[level][orientation].qlog= q;
1264 #define GET_S(dst, check) \
1265 tmp= get_symbol(&s->c, s->header_state, 0);\
1267 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1272 static int decode_header(SnowContext *s){
1273 int plane_index, tmp;
1276 memset(kstate, MID_STATE, sizeof(kstate));
1278 s->keyframe= get_rac(&s->c, kstate);
1279 if(s->keyframe || s->always_reset){
1281 s->spatial_decomposition_type=
1285 s->block_max_depth= 0;
1288 GET_S(s->version, tmp <= 0U)
1289 s->always_reset= get_rac(&s->c, s->header_state);
1290 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1291 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1292 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1293 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1294 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1295 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1296 s->spatial_scalability= get_rac(&s->c, s->header_state);
1297 // s->rate_scalability= get_rac(&s->c, s->header_state);
1298 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1299 s->max_ref_frames++;
1305 if(get_rac(&s->c, s->header_state)){
1306 for(plane_index=0; plane_index<2; plane_index++){
1307 int htaps, i, sum=0;
1308 Plane *p= &s->plane[plane_index];
1309 p->diag_mc= get_rac(&s->c, s->header_state);
1310 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1311 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1314 for(i= htaps/2; i; i--){
1315 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1316 sum += p->hcoeff[i];
1318 p->hcoeff[0]= 32-sum;
1320 s->plane[2].diag_mc= s->plane[1].diag_mc;
1321 s->plane[2].htaps = s->plane[1].htaps;
1322 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1324 if(get_rac(&s->c, s->header_state)){
1325 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1330 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1331 if(s->spatial_decomposition_type > 1U){
1332 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1335 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1336 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1337 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1341 s->qlog += get_symbol(&s->c, s->header_state, 1);
1342 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1343 s->qbias += get_symbol(&s->c, s->header_state, 1);
1344 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1345 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1346 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1347 s->block_max_depth= 0;
1354 static void init_qexp(void){
1358 for(i=0; i<QROOT; i++){
1360 v *= pow(2, 1.0 / QROOT);
1364 static av_cold int common_init(AVCodecContext *avctx){
1365 SnowContext *s = avctx->priv_data;
1370 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1372 dsputil_init(&s->dsp, avctx);
1373 ff_dwt_init(&s->dwt);
1376 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1377 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1378 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1379 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1380 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1381 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1400 #define mcfh(dx,dy)\
1401 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1402 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1403 mc_block_hpel ## dx ## dy ## 16;\
1404 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1405 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1406 mc_block_hpel ## dx ## dy ## 8;
1416 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1418 width= s->avctx->width;
1419 height= s->avctx->height;
1421 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1422 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1424 for(i=0; i<MAX_REF_FRAMES; i++)
1425 for(j=0; j<MAX_REF_FRAMES; j++)
1426 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1428 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1429 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1434 static int common_init_after_header(AVCodecContext *avctx){
1435 SnowContext *s = avctx->priv_data;
1436 int plane_index, level, orientation;
1438 for(plane_index=0; plane_index<3; plane_index++){
1439 int w= s->avctx->width;
1440 int h= s->avctx->height;
1443 w>>= s->chroma_h_shift;
1444 h>>= s->chroma_v_shift;
1446 s->plane[plane_index].width = w;
1447 s->plane[plane_index].height= h;
1449 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1450 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1451 SubBand *b= &s->plane[plane_index].band[level][orientation];
1453 b->buf= s->spatial_dwt_buffer;
1455 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1456 b->width = (w + !(orientation&1))>>1;
1457 b->height= (h + !(orientation>1))>>1;
1459 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1460 b->buf_x_offset = 0;
1461 b->buf_y_offset = 0;
1465 b->buf_x_offset = (w+1)>>1;
1468 b->buf += b->stride>>1;
1469 b->buf_y_offset = b->stride_line >> 1;
1471 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1474 b->parent= &s->plane[plane_index].band[level-1][orientation];
1475 //FIXME avoid this realloc
1476 av_freep(&b->x_coeff);
1477 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1492 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1493 SubBand *b= &p->band[level][orientation];
1497 int step= 1 << (s->spatial_decomposition_count - level);
1504 //FIXME bias for nonzero ?
1506 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1507 for(y=0; y<p->height; y++){
1508 for(x=0; x<p->width; x++){
1509 int sx= (x-xo + step/2) / step / Q2_STEP;
1510 int sy= (y-yo + step/2) / step / Q2_STEP;
1511 int v= r0[x + y*p->width] - r1[x + y*p->width];
1512 assert(sx>=0 && sy>=0 && sx < score_stride);
1514 score[sx + sy*score_stride] += v*v;
1515 assert(score[sx + sy*score_stride] >= 0);
1520 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1521 int level, orientation;
1523 for(level=0; level<s->spatial_decomposition_count; level++){
1524 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1525 SubBand *b= &p->band[level][orientation];
1526 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1528 dequantize(s, b, dst, b->stride);
1533 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1534 int level, orientation, ys, xs, x, y, pass;
1535 IDWTELEM best_dequant[height * stride];
1536 IDWTELEM idwt2_buffer[height * stride];
1537 const int score_stride= (width + 10)/Q2_STEP;
1538 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1539 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1540 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1542 //FIXME pass the copy cleanly ?
1544 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1545 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1547 for(level=0; level<s->spatial_decomposition_count; level++){
1548 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1549 SubBand *b= &p->band[level][orientation];
1550 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1551 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1552 assert(src == b->buf); // code does not depend on this but it is true currently
1554 quantize(s, b, dst, src, b->stride, s->qbias);
1557 for(pass=0; pass<1; pass++){
1558 if(s->qbias == 0) //keyframe
1560 for(level=0; level<s->spatial_decomposition_count; level++){
1561 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1562 SubBand *b= &p->band[level][orientation];
1563 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1564 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1566 for(ys= 0; ys<Q2_STEP; ys++){
1567 for(xs= 0; xs<Q2_STEP; xs++){
1568 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1569 dequantize_all(s, p, idwt2_buffer, width, height);
1570 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1571 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1572 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1573 for(y=ys; y<b->height; y+= Q2_STEP){
1574 for(x=xs; x<b->width; x+= Q2_STEP){
1575 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1576 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1577 //FIXME try more than just --
1580 dequantize_all(s, p, idwt2_buffer, width, height);
1581 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1582 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1583 for(y=ys; y<b->height; y+= Q2_STEP){
1584 for(x=xs; x<b->width; x+= Q2_STEP){
1585 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1586 if(score[score_idx] <= best_score[score_idx] + threshold){
1587 best_score[score_idx]= score[score_idx];
1588 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1589 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1590 //FIXME copy instead
1599 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1602 #endif /* QUANTIZE2==1 */
1604 #define USE_HALFPEL_PLANE 0
1606 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1609 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1613 int w= s->avctx->width >>is_chroma;
1614 int h= s->avctx->height >>is_chroma;
1615 int ls= frame->linesize[p];
1616 uint8_t *src= frame->data[p];
1618 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1619 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1620 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1627 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1634 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1642 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1650 static void release_buffer(AVCodecContext *avctx){
1651 SnowContext *s = avctx->priv_data;
1654 if(s->last_picture[s->max_ref_frames-1].data[0]){
1655 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1657 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1658 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1662 static int frame_start(SnowContext *s){
1664 int w= s->avctx->width; //FIXME round up to x16 ?
1665 int h= s->avctx->height;
1667 if(s->current_picture.data[0]){
1668 s->dsp.draw_edges(s->current_picture.data[0],
1669 s->current_picture.linesize[0], w , h ,
1670 EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1671 s->dsp.draw_edges(s->current_picture.data[1],
1672 s->current_picture.linesize[1], w>>1, h>>1,
1673 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1674 s->dsp.draw_edges(s->current_picture.data[2],
1675 s->current_picture.linesize[2], w>>1, h>>1,
1676 EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1679 release_buffer(s->avctx);
1681 tmp= s->last_picture[s->max_ref_frames-1];
1682 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1683 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1684 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1685 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1686 s->last_picture[0]= s->current_picture;
1687 s->current_picture= tmp;
1693 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
1694 if(i && s->last_picture[i-1].key_frame)
1697 if(s->ref_frames==0){
1698 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
1703 s->current_picture.reference= 1;
1704 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
1705 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1709 s->current_picture.key_frame= s->keyframe;
1714 static av_cold void common_end(SnowContext *s){
1715 int plane_index, level, orientation, i;
1717 av_freep(&s->spatial_dwt_buffer);
1718 av_freep(&s->spatial_idwt_buffer);
1721 av_freep(&s->m.me.scratchpad);
1722 av_freep(&s->m.me.map);
1723 av_freep(&s->m.me.score_map);
1724 av_freep(&s->m.obmc_scratchpad);
1726 av_freep(&s->block);
1727 av_freep(&s->scratchbuf);
1729 for(i=0; i<MAX_REF_FRAMES; i++){
1730 av_freep(&s->ref_mvs[i]);
1731 av_freep(&s->ref_scores[i]);
1732 if(s->last_picture[i].data[0])
1733 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
1736 for(plane_index=0; plane_index<3; plane_index++){
1737 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1738 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1739 SubBand *b= &s->plane[plane_index].band[level][orientation];
1741 av_freep(&b->x_coeff);
1745 if (s->mconly_picture.data[0])
1746 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
1747 if (s->current_picture.data[0])
1748 s->avctx->release_buffer(s->avctx, &s->current_picture);
1751 static av_cold int decode_init(AVCodecContext *avctx)
1753 avctx->pix_fmt= PIX_FMT_YUV420P;
1760 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
1761 const uint8_t *buf = avpkt->data;
1762 int buf_size = avpkt->size;
1763 SnowContext *s = avctx->priv_data;
1764 RangeCoder * const c= &s->c;
1766 AVFrame *picture = data;
1767 int level, orientation, plane_index;
1769 ff_init_range_decoder(c, buf, buf_size);
1770 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1772 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
1773 if(decode_header(s)<0)
1775 common_init_after_header(avctx);
1777 // realloc slice buffer for the case that spatial_decomposition_count changed
1778 ff_slice_buffer_destroy(&s->sb);
1779 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
1781 for(plane_index=0; plane_index<3; plane_index++){
1782 Plane *p= &s->plane[plane_index];
1783 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
1784 && p->hcoeff[1]==-10
1790 if(frame_start(s) < 0)
1792 //keyframe flag duplication mess FIXME
1793 if(avctx->debug&FF_DEBUG_PICT_INFO)
1794 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
1798 for(plane_index=0; plane_index<3; plane_index++){
1799 Plane *p= &s->plane[plane_index];
1803 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
1805 if(s->avctx->debug&2048){
1806 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
1807 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1811 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
1812 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
1818 for(level=0; level<s->spatial_decomposition_count; level++){
1819 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1820 SubBand *b= &p->band[level][orientation];
1821 unpack_coeffs(s, b, b->parent, orientation);
1827 const int mb_h= s->b_height << s->block_max_depth;
1828 const int block_size = MB_SIZE >> s->block_max_depth;
1829 const int block_w = plane_index ? block_size/2 : block_size;
1831 DWTCompose cs[MAX_DECOMPOSITIONS];
1836 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
1837 for(mb_y=0; mb_y<=mb_h; mb_y++){
1839 int slice_starty = block_w*mb_y;
1840 int slice_h = block_w*(mb_y+1);
1841 if (!(s->keyframe || s->avctx->debug&512)){
1842 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
1843 slice_h -= (block_w >> 1);
1846 for(level=0; level<s->spatial_decomposition_count; level++){
1847 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1848 SubBand *b= &p->band[level][orientation];
1851 int our_mb_start = mb_y;
1852 int our_mb_end = (mb_y + 1);
1854 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
1855 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
1856 if (!(s->keyframe || s->avctx->debug&512)){
1857 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1858 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
1860 start_y = FFMIN(b->height, start_y);
1861 end_y = FFMIN(b->height, end_y);
1863 if (start_y != end_y){
1864 if (orientation == 0){
1865 SubBand * correlate_band = &p->band[0][0];
1866 int correlate_end_y = FFMIN(b->height, end_y + 1);
1867 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
1868 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
1869 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
1870 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
1873 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
1878 for(; yd<slice_h; yd+=4){
1879 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
1882 if(s->qlog == LOSSLESS_QLOG){
1883 for(; yq<slice_h && yq<h; yq++){
1884 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
1886 line[x] <<= FRAC_BITS;
1891 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
1893 y = FFMIN(p->height, slice_starty);
1894 end_y = FFMIN(p->height, slice_h);
1896 ff_slice_buffer_release(&s->sb, y++);
1899 ff_slice_buffer_flush(&s->sb);
1906 release_buffer(avctx);
1908 if(!(s->avctx->debug&2048))
1909 *picture= s->current_picture;
1911 *picture= s->mconly_picture;
1913 *data_size = sizeof(AVFrame);
1915 bytes_read= c->bytestream - c->bytestream_start;
1916 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
1921 static av_cold int decode_end(AVCodecContext *avctx)
1923 SnowContext *s = avctx->priv_data;
1925 ff_slice_buffer_destroy(&s->sb);
1932 AVCodec ff_snow_decoder = {
1934 .type = AVMEDIA_TYPE_VIDEO,
1935 .id = CODEC_ID_SNOW,
1936 .priv_data_size = sizeof(SnowContext),
1937 .init = decode_init,
1938 .close = decode_end,
1939 .decode = decode_frame,
1940 .capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
1941 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
1944 #if CONFIG_SNOW_ENCODER
1945 static av_cold int encode_init(AVCodecContext *avctx)
1947 SnowContext *s = avctx->priv_data;
1950 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
1951 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
1952 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
1956 if(avctx->prediction_method == DWT_97
1957 && (avctx->flags & CODEC_FLAG_QSCALE)
1958 && avctx->global_quality == 0){
1959 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
1963 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
1965 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
1966 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
1968 for(plane_index=0; plane_index<3; plane_index++){
1969 s->plane[plane_index].diag_mc= 1;
1970 s->plane[plane_index].htaps= 6;
1971 s->plane[plane_index].hcoeff[0]= 40;
1972 s->plane[plane_index].hcoeff[1]= -10;
1973 s->plane[plane_index].hcoeff[2]= 2;
1974 s->plane[plane_index].fast_mc= 1;
1983 s->m.flags = avctx->flags;
1984 s->m.bit_rate= avctx->bit_rate;
1987 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1988 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1989 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1990 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
1991 h263_encode_init(&s->m); //mv_penalty
1993 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
1995 if(avctx->flags&CODEC_FLAG_PASS1){
1996 if(!avctx->stats_out)
1997 avctx->stats_out = av_mallocz(256);
1999 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2000 if(ff_rate_control_init(&s->m) < 0)
2003 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2005 avctx->coded_frame= &s->current_picture;
2006 switch(avctx->pix_fmt){
2007 // case PIX_FMT_YUV444P:
2008 // case PIX_FMT_YUV422P:
2009 case PIX_FMT_YUV420P:
2011 // case PIX_FMT_YUV411P:
2012 // case PIX_FMT_YUV410P:
2013 s->colorspace_type= 0;
2015 /* case PIX_FMT_RGB32:
2019 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2022 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2023 s->chroma_h_shift= 1;
2024 s->chroma_v_shift= 1;
2026 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2027 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2029 s->avctx->get_buffer(s->avctx, &s->input_picture);
2031 if(s->avctx->me_method == ME_ITER){
2033 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2034 for(i=0; i<s->max_ref_frames; i++){
2035 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2036 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2043 //near copy & paste from dsputil, FIXME
2044 static int pix_sum(uint8_t * pix, int line_size, int w)
2049 for (i = 0; i < w; i++) {
2050 for (j = 0; j < w; j++) {
2054 pix += line_size - w;
2059 //near copy & paste from dsputil, FIXME
2060 static int pix_norm1(uint8_t * pix, int line_size, int w)
2063 uint32_t *sq = ff_squareTbl + 256;
2066 for (i = 0; i < w; i++) {
2067 for (j = 0; j < w; j ++) {
2071 pix += line_size - w;
2079 #define P_TOPRIGHT P[3]
2080 #define P_MEDIAN P[4]
2082 #define FLAG_QPEL 1 //must be 1
2084 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2085 uint8_t p_buffer[1024];
2086 uint8_t i_buffer[1024];
2087 uint8_t p_state[sizeof(s->block_state)];
2088 uint8_t i_state[sizeof(s->block_state)];
2090 uint8_t *pbbak= s->c.bytestream;
2091 uint8_t *pbbak_start= s->c.bytestream_start;
2092 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2093 const int w= s->b_width << s->block_max_depth;
2094 const int h= s->b_height << s->block_max_depth;
2095 const int rem_depth= s->block_max_depth - level;
2096 const int index= (x + y*w) << rem_depth;
2097 const int block_w= 1<<(LOG2_MB_SIZE - level);
2098 int trx= (x+1)<<rem_depth;
2099 int try= (y+1)<<rem_depth;
2100 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2101 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2102 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2103 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2104 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2105 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2106 int pl = left->color[0];
2107 int pcb= left->color[1];
2108 int pcr= left->color[2];
2112 const int stride= s->current_picture.linesize[0];
2113 const int uvstride= s->current_picture.linesize[1];
2114 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2115 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2116 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2118 int16_t last_mv[3][2];
2119 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2120 const int shift= 1+qpel;
2121 MotionEstContext *c= &s->m.me;
2122 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2123 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2124 int my_context= av_log2(2*FFABS(left->my - top->my));
2125 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2126 int ref, best_ref, ref_score, ref_mx, ref_my;
2128 assert(sizeof(s->block_state) >= 256);
2130 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2134 // clip predictors / edge ?
2136 P_LEFT[0]= left->mx;
2137 P_LEFT[1]= left->my;
2140 P_TOPRIGHT[0]= tr->mx;
2141 P_TOPRIGHT[1]= tr->my;
2143 last_mv[0][0]= s->block[index].mx;
2144 last_mv[0][1]= s->block[index].my;
2145 last_mv[1][0]= right->mx;
2146 last_mv[1][1]= right->my;
2147 last_mv[2][0]= bottom->mx;
2148 last_mv[2][1]= bottom->my;
2155 assert(c-> stride == stride);
2156 assert(c->uvstride == uvstride);
2158 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2159 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2160 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2161 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2163 c->xmin = - x*block_w - 16+3;
2164 c->ymin = - y*block_w - 16+3;
2165 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2166 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2168 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2169 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2170 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2171 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2172 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2173 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2174 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2176 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2177 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2180 c->pred_x= P_LEFT[0];
2181 c->pred_y= P_LEFT[1];
2183 c->pred_x = P_MEDIAN[0];
2184 c->pred_y = P_MEDIAN[1];
2189 for(ref=0; ref<s->ref_frames; ref++){
2190 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2192 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2193 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2195 assert(ref_mx >= c->xmin);
2196 assert(ref_mx <= c->xmax);
2197 assert(ref_my >= c->ymin);
2198 assert(ref_my <= c->ymax);
2200 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2201 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2202 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2203 if(s->ref_mvs[ref]){
2204 s->ref_mvs[ref][index][0]= ref_mx;
2205 s->ref_mvs[ref][index][1]= ref_my;
2206 s->ref_scores[ref][index]= ref_score;
2208 if(score > ref_score){
2215 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2218 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2220 pc.bytestream_start=
2221 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2222 memcpy(p_state, s->block_state, sizeof(s->block_state));
2224 if(level!=s->block_max_depth)
2225 put_rac(&pc, &p_state[4 + s_context], 1);
2226 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2227 if(s->ref_frames > 1)
2228 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2229 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2230 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2231 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2232 p_len= pc.bytestream - pc.bytestream_start;
2233 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2235 block_s= block_w*block_w;
2236 sum = pix_sum(current_data[0], stride, block_w);
2237 l= (sum + block_s/2)/block_s;
2238 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2240 block_s= block_w*block_w>>2;
2241 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2242 cb= (sum + block_s/2)/block_s;
2243 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2244 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2245 cr= (sum + block_s/2)/block_s;
2246 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2249 ic.bytestream_start=
2250 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2251 memcpy(i_state, s->block_state, sizeof(s->block_state));
2252 if(level!=s->block_max_depth)
2253 put_rac(&ic, &i_state[4 + s_context], 1);
2254 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2255 put_symbol(&ic, &i_state[32], l-pl , 1);
2256 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2257 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2258 i_len= ic.bytestream - ic.bytestream_start;
2259 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2261 // assert(score==256*256*256*64-1);
2262 assert(iscore < 255*255*256 + s->lambda2*10);
2263 assert(iscore >= 0);
2264 assert(l>=0 && l<=255);
2265 assert(pl>=0 && pl<=255);
2268 int varc= iscore >> 8;
2269 int vard= score >> 8;
2270 if (vard <= 64 || vard < varc)
2271 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2273 c->scene_change_score+= s->m.qscale;
2276 if(level!=s->block_max_depth){
2277 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2278 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2279 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2280 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2281 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2282 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2284 if(score2 < score && score2 < iscore)
2289 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2290 memcpy(pbbak, i_buffer, i_len);
2292 s->c.bytestream_start= pbbak_start;
2293 s->c.bytestream= pbbak + i_len;
2294 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2295 memcpy(s->block_state, i_state, sizeof(s->block_state));
2298 memcpy(pbbak, p_buffer, p_len);
2300 s->c.bytestream_start= pbbak_start;
2301 s->c.bytestream= pbbak + p_len;
2302 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2303 memcpy(s->block_state, p_state, sizeof(s->block_state));
2308 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2309 const int w= s->b_width << s->block_max_depth;
2310 const int rem_depth= s->block_max_depth - level;
2311 const int index= (x + y*w) << rem_depth;
2312 int trx= (x+1)<<rem_depth;
2313 BlockNode *b= &s->block[index];
2314 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2315 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2316 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2317 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2318 int pl = left->color[0];
2319 int pcb= left->color[1];
2320 int pcr= left->color[2];
2322 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2323 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2324 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2325 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2328 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2332 if(level!=s->block_max_depth){
2333 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2334 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2336 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2337 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2338 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2339 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2340 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2344 if(b->type & BLOCK_INTRA){
2345 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2346 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2347 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2348 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2349 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2350 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2352 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2353 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2354 if(s->ref_frames > 1)
2355 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2356 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2357 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2358 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2362 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2364 Plane *p= &s->plane[plane_index];
2365 const int block_size = MB_SIZE >> s->block_max_depth;
2366 const int block_w = plane_index ? block_size/2 : block_size;
2367 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2368 const int obmc_stride= plane_index ? block_size : 2*block_size;
2369 const int ref_stride= s->current_picture.linesize[plane_index];
2370 uint8_t *src= s-> input_picture.data[plane_index];
2371 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2372 const int b_stride = s->b_width << s->block_max_depth;
2373 const int w= p->width;
2374 const int h= p->height;
2375 int index= mb_x + mb_y*b_stride;
2376 BlockNode *b= &s->block[index];
2377 BlockNode backup= *b;
2381 b->type|= BLOCK_INTRA;
2382 b->color[plane_index]= 0;
2383 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2386 int mb_x2= mb_x + (i &1) - 1;
2387 int mb_y2= mb_y + (i>>1) - 1;
2388 int x= block_w*mb_x2 + block_w/2;
2389 int y= block_w*mb_y2 + block_w/2;
2391 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2392 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2394 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2395 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2396 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2397 int obmc_v= obmc[index];
2399 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2400 if(x<0) obmc_v += obmc[index + block_w];
2401 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2402 if(x+block_w>w) obmc_v += obmc[index - block_w];
2403 //FIXME precalculate this or simplify it somehow else
2405 d = -dst[index] + (1<<(FRAC_BITS-1));
2407 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2408 aa += obmc_v * obmc_v; //FIXME precalculate this
2414 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2417 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2418 const int b_stride = s->b_width << s->block_max_depth;
2419 const int b_height = s->b_height<< s->block_max_depth;
2420 int index= x + y*b_stride;
2421 const BlockNode *b = &s->block[index];
2422 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2423 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2424 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2425 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2427 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2428 // int my_context= av_log2(2*FFABS(left->my - top->my));
2430 if(x<0 || x>=b_stride || y>=b_height)
2437 00001XXXX 15-30 8-15
2439 //FIXME try accurate rate
2440 //FIXME intra and inter predictors if surrounding blocks are not the same type
2441 if(b->type & BLOCK_INTRA){
2442 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2443 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2444 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2446 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2449 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2450 + av_log2(2*FFABS(dmy))
2451 + av_log2(2*b->ref));
2455 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2456 Plane *p= &s->plane[plane_index];
2457 const int block_size = MB_SIZE >> s->block_max_depth;
2458 const int block_w = plane_index ? block_size/2 : block_size;
2459 const int obmc_stride= plane_index ? block_size : 2*block_size;
2460 const int ref_stride= s->current_picture.linesize[plane_index];
2461 uint8_t *dst= s->current_picture.data[plane_index];
2462 uint8_t *src= s-> input_picture.data[plane_index];
2463 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2464 uint8_t *cur = s->scratchbuf;
2465 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2466 const int b_stride = s->b_width << s->block_max_depth;
2467 const int b_height = s->b_height<< s->block_max_depth;
2468 const int w= p->width;
2469 const int h= p->height;
2472 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2473 int sx= block_w*mb_x - block_w/2;
2474 int sy= block_w*mb_y - block_w/2;
2475 int x0= FFMAX(0,-sx);
2476 int y0= FFMAX(0,-sy);
2477 int x1= FFMIN(block_w*2, w-sx);
2478 int y1= FFMIN(block_w*2, h-sy);
2481 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2483 for(y=y0; y<y1; y++){
2484 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2485 const IDWTELEM *pred1 = pred + y*obmc_stride;
2486 uint8_t *cur1 = cur + y*ref_stride;
2487 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2488 for(x=x0; x<x1; x++){
2489 #if FRAC_BITS >= LOG2_OBMC_MAX
2490 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2492 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2494 v = (v + pred1[x]) >> FRAC_BITS;
2495 if(v&(~255)) v= ~(v>>31);
2500 /* copy the regions where obmc[] = (uint8_t)256 */
2501 if(LOG2_OBMC_MAX == 8
2502 && (mb_x == 0 || mb_x == b_stride-1)
2503 && (mb_y == 0 || mb_y == b_height-1)){
2512 for(y=y0; y<y1; y++)
2513 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2517 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2518 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2519 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2520 * So improving the score of one block is not strictly guaranteed
2521 * to improve the score of the whole frame, thus iterative motion
2522 * estimation does not always converge. */
2523 if(s->avctx->me_cmp == FF_CMP_W97)
2524 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2525 else if(s->avctx->me_cmp == FF_CMP_W53)
2526 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2530 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2531 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2536 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2545 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2547 if(mb_x == b_stride-2)
2548 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2550 return distortion + rate*penalty_factor;
2553 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2555 Plane *p= &s->plane[plane_index];
2556 const int block_size = MB_SIZE >> s->block_max_depth;
2557 const int block_w = plane_index ? block_size/2 : block_size;
2558 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2559 const int obmc_stride= plane_index ? block_size : 2*block_size;
2560 const int ref_stride= s->current_picture.linesize[plane_index];
2561 uint8_t *dst= s->current_picture.data[plane_index];
2562 uint8_t *src= s-> input_picture.data[plane_index];
2563 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2564 // const has only been removed from zero_dst to suppress a warning
2565 static IDWTELEM zero_dst[4096]; //FIXME
2566 const int b_stride = s->b_width << s->block_max_depth;
2567 const int w= p->width;
2568 const int h= p->height;
2571 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2574 int mb_x2= mb_x + (i%3) - 1;
2575 int mb_y2= mb_y + (i/3) - 1;
2576 int x= block_w*mb_x2 + block_w/2;
2577 int y= block_w*mb_y2 + block_w/2;
2579 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2580 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2582 //FIXME find a cleaner/simpler way to skip the outside stuff
2583 for(y2= y; y2<0; y2++)
2584 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2585 for(y2= h; y2<y+block_w; y2++)
2586 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2588 for(y2= y; y2<y+block_w; y2++)
2589 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2592 for(y2= y; y2<y+block_w; y2++)
2593 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2596 assert(block_w== 8 || block_w==16);
2597 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2601 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2602 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2610 rate = get_block_bits(s, mb_x, mb_y, 2);
2611 for(i=merged?4:0; i<9; i++){
2612 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2613 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2616 return distortion + rate*penalty_factor;
2619 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2620 const int w= b->width;
2621 const int h= b->height;
2633 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2634 v= src[x + y*stride];
2637 t= src[x + (y-1)*stride];
2639 lt= src[x - 1 + (y-1)*stride];
2642 rt= src[x + 1 + (y-1)*stride];
2646 l= src[x - 1 + y*stride];
2648 if(orientation==1) ll= src[y + (x-2)*stride];
2649 else ll= src[x - 2 + y*stride];
2655 if(px<b->parent->width && py<b->parent->height)
2656 p= parent[px + py*2*stride];
2658 if(!(/*ll|*/l|lt|t|rt|p)){
2660 runs[run_index++]= run;
2668 max_index= run_index;
2669 runs[run_index++]= run;
2671 run= runs[run_index++];
2673 put_symbol2(&s->c, b->state[30], max_index, 0);
2674 if(run_index <= max_index)
2675 put_symbol2(&s->c, b->state[1], run, 3);
2678 if(s->c.bytestream_end - s->c.bytestream < w*40){
2679 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2684 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2685 v= src[x + y*stride];
2688 t= src[x + (y-1)*stride];
2690 lt= src[x - 1 + (y-1)*stride];
2693 rt= src[x + 1 + (y-1)*stride];
2697 l= src[x - 1 + y*stride];
2699 if(orientation==1) ll= src[y + (x-2)*stride];
2700 else ll= src[x - 2 + y*stride];
2706 if(px<b->parent->width && py<b->parent->height)
2707 p= parent[px + py*2*stride];
2709 if(/*ll|*/l|lt|t|rt|p){
2710 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2712 put_rac(&s->c, &b->state[0][context], !!v);
2715 run= runs[run_index++];
2717 if(run_index <= max_index)
2718 put_symbol2(&s->c, b->state[1], run, 3);
2726 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
2727 int l2= 2*FFABS(l) + (l<0);
2728 int t2= 2*FFABS(t) + (t<0);
2730 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
2731 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
2739 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2740 // encode_subband_qtree(s, b, src, parent, stride, orientation);
2741 // encode_subband_z0run(s, b, src, parent, stride, orientation);
2742 return encode_subband_c0run(s, b, src, parent, stride, orientation);
2743 // encode_subband_dzr(s, b, src, parent, stride, orientation);
2746 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2747 const int b_stride= s->b_width << s->block_max_depth;
2748 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2749 BlockNode backup= *block;
2750 int rd, index, value;
2752 assert(mb_x>=0 && mb_y>=0);
2753 assert(mb_x<b_stride);
2756 block->color[0] = p[0];
2757 block->color[1] = p[1];
2758 block->color[2] = p[2];
2759 block->type |= BLOCK_INTRA;
2761 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2762 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2763 if(s->me_cache[index] == value)
2765 s->me_cache[index]= value;
2769 block->type &= ~BLOCK_INTRA;
2772 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2784 /* special case for int[2] args we discard afterwards,
2785 * fixes compilation problem with gcc 2.95 */
2786 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2787 int p[2] = {p0, p1};
2788 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2791 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2792 const int b_stride= s->b_width << s->block_max_depth;
2793 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2794 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2795 int rd, index, value;
2797 assert(mb_x>=0 && mb_y>=0);
2798 assert(mb_x<b_stride);
2799 assert(((mb_x|mb_y)&1) == 0);
2801 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2802 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2803 if(s->me_cache[index] == value)
2805 s->me_cache[index]= value;
2810 block->type &= ~BLOCK_INTRA;
2811 block[1]= block[b_stride]= block[b_stride+1]= *block;
2813 rd= get_4block_rd(s, mb_x, mb_y, 0);
2820 block[0]= backup[0];
2821 block[1]= backup[1];
2822 block[b_stride]= backup[2];
2823 block[b_stride+1]= backup[3];
2828 static void iterative_me(SnowContext *s){
2829 int pass, mb_x, mb_y;
2830 const int b_width = s->b_width << s->block_max_depth;
2831 const int b_height= s->b_height << s->block_max_depth;
2832 const int b_stride= b_width;
2836 RangeCoder r = s->c;
2837 uint8_t state[sizeof(s->block_state)];
2838 memcpy(state, s->block_state, sizeof(s->block_state));
2839 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2840 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2841 encode_q_branch(s, 0, mb_x, mb_y);
2843 memcpy(s->block_state, state, sizeof(s->block_state));
2846 for(pass=0; pass<25; pass++){
2849 for(mb_y= 0; mb_y<b_height; mb_y++){
2850 for(mb_x= 0; mb_x<b_width; mb_x++){
2851 int dia_change, i, j, ref;
2852 int best_rd= INT_MAX, ref_rd;
2853 BlockNode backup, ref_b;
2854 const int index= mb_x + mb_y * b_stride;
2855 BlockNode *block= &s->block[index];
2856 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
2857 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
2858 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
2859 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
2860 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
2861 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
2862 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2863 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2864 const int b_w= (MB_SIZE >> s->block_max_depth);
2865 uint8_t obmc_edged[b_w*2][b_w*2];
2867 if(pass && (block->type & BLOCK_OPT))
2869 block->type |= BLOCK_OPT;
2873 if(!s->me_cache_generation)
2874 memset(s->me_cache, 0, sizeof(s->me_cache));
2875 s->me_cache_generation += 1<<22;
2877 //FIXME precalculate
2880 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2882 for(y=0; y<b_w*2; y++)
2883 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2884 if(mb_x==b_stride-1)
2885 for(y=0; y<b_w*2; y++)
2886 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2888 for(x=0; x<b_w*2; x++)
2889 obmc_edged[0][x] += obmc_edged[b_w-1][x];
2890 for(y=1; y<b_w; y++)
2891 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2893 if(mb_y==b_height-1){
2894 for(x=0; x<b_w*2; x++)
2895 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2896 for(y=b_w; y<b_w*2-1; y++)
2897 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2901 //skip stuff outside the picture
2902 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
2903 uint8_t *src= s-> input_picture.data[0];
2904 uint8_t *dst= s->current_picture.data[0];
2905 const int stride= s->current_picture.linesize[0];
2906 const int block_w= MB_SIZE >> s->block_max_depth;
2907 const int sx= block_w*mb_x - block_w/2;
2908 const int sy= block_w*mb_y - block_w/2;
2909 const int w= s->plane[0].width;
2910 const int h= s->plane[0].height;
2914 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2915 for(y=h; y<sy+block_w*2; y++)
2916 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
2918 for(y=sy; y<sy+block_w*2; y++)
2919 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
2921 if(sx+block_w*2 > w){
2922 for(y=sy; y<sy+block_w*2; y++)
2923 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
2927 // intra(black) = neighbors' contribution to the current block
2929 color[i]= get_dc(s, mb_x, mb_y, i);
2931 // get previous score (cannot be cached due to OBMC)
2932 if(pass > 0 && (block->type&BLOCK_INTRA)){
2933 int color0[3]= {block->color[0], block->color[1], block->color[2]};
2934 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
2936 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
2940 for(ref=0; ref < s->ref_frames; ref++){
2941 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
2942 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
2947 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
2948 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
2950 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
2952 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
2954 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
2956 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
2959 //FIXME avoid subpel interpolation / round to nearest integer
2962 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
2964 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2965 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2966 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
2967 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
2973 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
2976 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
2978 //FIXME or try the standard 2 pass qpel or similar
2980 mvr[0][0]= block->mx;
2981 mvr[0][1]= block->my;
2982 if(ref_rd > best_rd){
2989 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
2990 //FIXME RD style color selection
2991 if(!same_block(block, &backup)){
2992 if(tb ) tb ->type &= ~BLOCK_OPT;
2993 if(lb ) lb ->type &= ~BLOCK_OPT;
2994 if(rb ) rb ->type &= ~BLOCK_OPT;
2995 if(bb ) bb ->type &= ~BLOCK_OPT;
2996 if(tlb) tlb->type &= ~BLOCK_OPT;
2997 if(trb) trb->type &= ~BLOCK_OPT;
2998 if(blb) blb->type &= ~BLOCK_OPT;
2999 if(brb) brb->type &= ~BLOCK_OPT;
3004 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3009 if(s->block_max_depth == 1){
3011 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3012 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3014 int best_rd, init_rd;
3015 const int index= mb_x + mb_y * b_stride;
3018 b[0]= &s->block[index];
3020 b[2]= b[0]+b_stride;
3022 if(same_block(b[0], b[1]) &&
3023 same_block(b[0], b[2]) &&
3024 same_block(b[0], b[3]))
3027 if(!s->me_cache_generation)
3028 memset(s->me_cache, 0, sizeof(s->me_cache));
3029 s->me_cache_generation += 1<<22;
3031 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3033 //FIXME more multiref search?
3034 check_4block_inter(s, mb_x, mb_y,
3035 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3036 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3039 if(!(b[i]->type&BLOCK_INTRA))
3040 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3042 if(init_rd != best_rd)
3046 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3050 static void encode_blocks(SnowContext *s, int search){
3055 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3059 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3060 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3064 if(s->avctx->me_method == ME_ITER || !search)
3065 encode_q_branch2(s, 0, x, y);
3067 encode_q_branch (s, 0, x, y);
3072 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3073 const int w= b->width;
3074 const int h= b->height;
3075 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3076 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3077 int x,y, thres1, thres2;
3079 if(s->qlog == LOSSLESS_QLOG){
3082 dst[x + y*stride]= src[x + y*stride];
3086 bias= bias ? 0 : (3*qmul)>>3;
3087 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3093 int i= src[x + y*stride];
3095 if((unsigned)(i+thres1) > thres2){
3098 i/= qmul; //FIXME optimize
3099 dst[x + y*stride]= i;
3103 i/= qmul; //FIXME optimize
3104 dst[x + y*stride]= -i;
3107 dst[x + y*stride]= 0;
3113 int i= src[x + y*stride];
3115 if((unsigned)(i+thres1) > thres2){
3118 i= (i + bias) / qmul; //FIXME optimize
3119 dst[x + y*stride]= i;
3123 i= (i + bias) / qmul; //FIXME optimize
3124 dst[x + y*stride]= -i;
3127 dst[x + y*stride]= 0;
3133 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3134 const int w= b->width;
3135 const int h= b->height;
3136 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3137 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3138 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3141 if(s->qlog == LOSSLESS_QLOG) return;
3145 int i= src[x + y*stride];
3147 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3149 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3155 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3156 const int w= b->width;
3157 const int h= b->height;
3160 for(y=h-1; y>=0; y--){
3161 for(x=w-1; x>=0; x--){
3162 int i= x + y*stride;
3166 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3167 else src[i] -= src[i - 1];
3169 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3170 else src[i] -= src[i - 1];
3173 if(y) src[i] -= src[i - stride];
3179 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3180 const int w= b->width;
3181 const int h= b->height;
3186 int i= x + y*stride;
3190 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3191 else src[i] += src[i - 1];
3193 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3194 else src[i] += src[i - 1];
3197 if(y) src[i] += src[i - stride];
3203 static void encode_qlogs(SnowContext *s){
3204 int plane_index, level, orientation;
3206 for(plane_index=0; plane_index<2; plane_index++){
3207 for(level=0; level<s->spatial_decomposition_count; level++){
3208 for(orientation=level ? 1:0; orientation<4; orientation++){
3209 if(orientation==2) continue;
3210 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3216 static void encode_header(SnowContext *s){
3220 memset(kstate, MID_STATE, sizeof(kstate));
3222 put_rac(&s->c, kstate, s->keyframe);
3223 if(s->keyframe || s->always_reset){
3225 s->last_spatial_decomposition_type=
3229 s->last_block_max_depth= 0;
3230 for(plane_index=0; plane_index<2; plane_index++){
3231 Plane *p= &s->plane[plane_index];
3234 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3238 put_symbol(&s->c, s->header_state, s->version, 0);
3239 put_rac(&s->c, s->header_state, s->always_reset);
3240 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3241 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3242 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3243 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3244 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3245 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3246 put_rac(&s->c, s->header_state, s->spatial_scalability);
3247 // put_rac(&s->c, s->header_state, s->rate_scalability);
3248 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3255 for(plane_index=0; plane_index<2; plane_index++){
3256 Plane *p= &s->plane[plane_index];
3257 update_mc |= p->last_htaps != p->htaps;
3258 update_mc |= p->last_diag_mc != p->diag_mc;
3259 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3261 put_rac(&s->c, s->header_state, update_mc);
3263 for(plane_index=0; plane_index<2; plane_index++){
3264 Plane *p= &s->plane[plane_index];
3265 put_rac(&s->c, s->header_state, p->diag_mc);
3266 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3267 for(i= p->htaps/2; i; i--)
3268 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3271 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3272 put_rac(&s->c, s->header_state, 1);
3273 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3276 put_rac(&s->c, s->header_state, 0);
3279 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3280 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3281 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3282 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3283 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3287 static void update_last_header_values(SnowContext *s){
3291 for(plane_index=0; plane_index<2; plane_index++){
3292 Plane *p= &s->plane[plane_index];
3293 p->last_diag_mc= p->diag_mc;
3294 p->last_htaps = p->htaps;
3295 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3299 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3300 s->last_qlog = s->qlog;
3301 s->last_qbias = s->qbias;
3302 s->last_mv_scale = s->mv_scale;
3303 s->last_block_max_depth = s->block_max_depth;
3304 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3307 static int qscale2qlog(int qscale){
3308 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3309 + 61*QROOT/8; //<64 >60
3312 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3314 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3315 * FIXME we know exact mv bits at this point,
3316 * but ratecontrol isn't set up to include them. */
3317 uint32_t coef_sum= 0;
3318 int level, orientation, delta_qlog;
3320 for(level=0; level<s->spatial_decomposition_count; level++){
3321 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3322 SubBand *b= &s->plane[0].band[level][orientation];
3323 IDWTELEM *buf= b->ibuf;
3324 const int w= b->width;
3325 const int h= b->height;
3326 const int stride= b->stride;
3327 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3328 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3329 const int qdiv= (1<<16)/qmul;
3331 //FIXME this is ugly
3334 buf[x+y*stride]= b->buf[x+y*stride];
3336 decorrelate(s, b, buf, stride, 1, 0);
3339 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3343 /* ugly, ratecontrol just takes a sqrt again */
3344 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3345 assert(coef_sum < INT_MAX);
3347 if(pict->pict_type == AV_PICTURE_TYPE_I){
3348 s->m.current_picture.mb_var_sum= coef_sum;
3349 s->m.current_picture.mc_mb_var_sum= 0;
3351 s->m.current_picture.mc_mb_var_sum= coef_sum;
3352 s->m.current_picture.mb_var_sum= 0;
3355 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3356 if (pict->quality < 0)
3358 s->lambda= pict->quality * 3/2;
3359 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3360 s->qlog+= delta_qlog;
3364 static void calculate_visual_weight(SnowContext *s, Plane *p){
3365 int width = p->width;
3366 int height= p->height;
3367 int level, orientation, x, y;
3369 for(level=0; level<s->spatial_decomposition_count; level++){
3370 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3371 SubBand *b= &p->band[level][orientation];
3372 IDWTELEM *ibuf= b->ibuf;
3375 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3376 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3377 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3378 for(y=0; y<height; y++){
3379 for(x=0; x<width; x++){
3380 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3385 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3390 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3391 SnowContext *s = avctx->priv_data;
3392 RangeCoder * const c= &s->c;
3393 AVFrame *pict = data;
3394 const int width= s->avctx->width;
3395 const int height= s->avctx->height;
3396 int level, orientation, plane_index, i, y;
3397 uint8_t rc_header_bak[sizeof(s->header_state)];
3398 uint8_t rc_block_bak[sizeof(s->block_state)];
3400 ff_init_range_encoder(c, buf, buf_size);
3401 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3405 for(y=0; y<(height>>shift); y++)
3406 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3407 &pict->data[i][y * pict->linesize[i]],
3410 s->new_picture = *pict;
3412 s->m.picture_number= avctx->frame_number;
3413 if(avctx->flags&CODEC_FLAG_PASS2){
3415 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3416 s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
3417 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3418 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3419 if (pict->quality < 0)
3423 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3425 pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3428 if(s->pass1_rc && avctx->frame_number == 0)
3429 pict->quality= 2*FF_QP2LAMBDA;
3431 s->qlog= qscale2qlog(pict->quality);
3432 s->lambda = pict->quality * 3/2;
3434 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3435 s->qlog= LOSSLESS_QLOG;
3437 }//else keep previous frame's qlog until after motion estimation
3441 s->m.current_picture_ptr= &s->m.current_picture;
3442 s->m.last_picture.f.pts = s->m.current_picture.f.pts;
3443 s->m.current_picture.f.pts = pict->pts;
3444 if(pict->pict_type == AV_PICTURE_TYPE_P){
3445 int block_width = (width +15)>>4;
3446 int block_height= (height+15)>>4;
3447 int stride= s->current_picture.linesize[0];
3449 assert(s->current_picture.data[0]);
3450 assert(s->last_picture[0].data[0]);
3452 s->m.avctx= s->avctx;
3453 s->m.current_picture.f.data[0] = s->current_picture.data[0];
3454 s->m. last_picture.f.data[0] = s->last_picture[0].data[0];
3455 s->m. new_picture.f.data[0] = s-> input_picture.data[0];
3456 s->m. last_picture_ptr= &s->m. last_picture;
3458 s->m. last_picture.f.linesize[0] =
3459 s->m. new_picture.f.linesize[0] =
3460 s->m.current_picture.f.linesize[0] = stride;
3461 s->m.uvlinesize= s->current_picture.linesize[1];
3463 s->m.height= height;
3464 s->m.mb_width = block_width;
3465 s->m.mb_height= block_height;
3466 s->m.mb_stride= s->m.mb_width+1;
3467 s->m.b8_stride= 2*s->m.mb_width+1;
3469 s->m.pict_type= pict->pict_type;
3470 s->m.me_method= s->avctx->me_method;
3471 s->m.me.scene_change_score=0;
3472 s->m.flags= s->avctx->flags;
3473 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3474 s->m.out_format= FMT_H263;
3475 s->m.unrestricted_mv= 1;
3477 s->m.lambda = s->lambda;
3478 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3479 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3481 s->m.dsp= s->dsp; //move
3487 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3488 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3493 if(pict->pict_type == AV_PICTURE_TYPE_I)
3494 s->spatial_decomposition_count= 5;
3496 s->spatial_decomposition_count= 5;
3498 s->m.pict_type = pict->pict_type;
3499 s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
3501 common_init_after_header(avctx);
3503 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3504 for(plane_index=0; plane_index<3; plane_index++){
3505 calculate_visual_weight(s, &s->plane[plane_index]);
3510 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3511 encode_blocks(s, 1);
3512 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3514 for(plane_index=0; plane_index<3; plane_index++){
3515 Plane *p= &s->plane[plane_index];
3519 // int bits= put_bits_count(&s->c.pb);
3521 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3523 if(pict->data[plane_index]) //FIXME gray hack
3526 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3529 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3532 && pict->pict_type == AV_PICTURE_TYPE_P
3533 && !(avctx->flags&CODEC_FLAG_PASS2)
3534 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3535 ff_init_range_encoder(c, buf, buf_size);
3536 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3537 pict->pict_type= AV_PICTURE_TYPE_I;
3539 s->current_picture.key_frame=1;
3543 if(s->qlog == LOSSLESS_QLOG){
3546 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3552 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3558 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3560 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3562 if(s->pass1_rc && plane_index==0){
3563 int delta_qlog = ratecontrol_1pass(s, pict);
3564 if (delta_qlog <= INT_MIN)
3567 //reordering qlog in the bitstream would eliminate this reset
3568 ff_init_range_encoder(c, buf, buf_size);
3569 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3570 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3572 encode_blocks(s, 0);
3576 for(level=0; level<s->spatial_decomposition_count; level++){
3577 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3578 SubBand *b= &p->band[level][orientation];
3581 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3583 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
3584 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3585 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3587 correlate(s, b, b->ibuf, b->stride, 1, 0);
3591 for(level=0; level<s->spatial_decomposition_count; level++){
3592 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3593 SubBand *b= &p->band[level][orientation];
3595 dequantize(s, b, b->ibuf, b->stride);
3599 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3600 if(s->qlog == LOSSLESS_QLOG){
3603 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3607 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3610 if(pict->pict_type == AV_PICTURE_TYPE_I){
3613 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3614 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3618 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3619 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3622 if(s->avctx->flags&CODEC_FLAG_PSNR){
3625 if(pict->data[plane_index]) //FIXME gray hack
3628 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3632 s->avctx->error[plane_index] += error;
3633 s->current_picture.error[plane_index] = error;
3638 update_last_header_values(s);
3640 release_buffer(avctx);
3642 s->current_picture.coded_picture_number = avctx->frame_number;
3643 s->current_picture.pict_type = pict->pict_type;
3644 s->current_picture.quality = pict->quality;
3645 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3646 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3647 s->m.current_picture.f.display_picture_number =
3648 s->m.current_picture.f.coded_picture_number = avctx->frame_number;
3649 s->m.current_picture.f.quality = pict->quality;
3650 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3652 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3654 if(avctx->flags&CODEC_FLAG_PASS1)
3655 ff_write_pass1_stats(&s->m);
3656 s->m.last_pict_type = s->m.pict_type;
3657 avctx->frame_bits = s->m.frame_bits;
3658 avctx->mv_bits = s->m.mv_bits;
3659 avctx->misc_bits = s->m.misc_bits;
3660 avctx->p_tex_bits = s->m.p_tex_bits;
3664 return ff_rac_terminate(c);
3667 static av_cold int encode_end(AVCodecContext *avctx)
3669 SnowContext *s = avctx->priv_data;
3672 if (s->input_picture.data[0])
3673 avctx->release_buffer(avctx, &s->input_picture);
3674 av_free(avctx->stats_out);
3679 AVCodec ff_snow_encoder = {
3681 .type = AVMEDIA_TYPE_VIDEO,
3682 .id = CODEC_ID_SNOW,
3683 .priv_data_size = sizeof(SnowContext),
3684 .init = encode_init,
3685 .encode = encode_frame,
3686 .close = encode_end,
3687 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3697 #include "libavutil/lfg.h"
3698 #include "libavutil/mathematics.h"
3703 int buffer[2][width*height];
3707 s.spatial_decomposition_count=6;
3708 s.spatial_decomposition_type=1;
3710 av_lfg_init(&prng, 1);
3712 printf("testing 5/3 DWT\n");
3713 for(i=0; i<width*height; i++)
3714 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3716 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3717 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3719 for(i=0; i<width*height; i++)
3720 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3722 printf("testing 9/7 DWT\n");
3723 s.spatial_decomposition_type=0;
3724 for(i=0; i<width*height; i++)
3725 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
3727 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3728 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3730 for(i=0; i<width*height; i++)
3731 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
3734 int level, orientation, x, y;
3735 int64_t errors[8][4];
3738 memset(errors, 0, sizeof(errors));
3739 s.spatial_decomposition_count=3;
3740 s.spatial_decomposition_type=0;
3741 for(level=0; level<s.spatial_decomposition_count; level++){
3742 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3743 int w= width >> (s.spatial_decomposition_count-level);
3744 int h= height >> (s.spatial_decomposition_count-level);
3745 int stride= width << (s.spatial_decomposition_count-level);
3746 DWTELEM *buf= buffer[0];
3749 if(orientation&1) buf+=w;
3750 if(orientation>1) buf+=stride>>1;
3752 memset(buffer[0], 0, sizeof(int)*width*height);
3753 buf[w/2 + h/2*stride]= 256*256;
3754 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3755 for(y=0; y<height; y++){
3756 for(x=0; x<width; x++){
3757 int64_t d= buffer[0][x + y*width];
3759 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
3761 if(FFABS(height/2-y)<9 && level==2) printf("\n");
3763 error= (int)(sqrt(error)+0.5);
3764 errors[level][orientation]= error;
3765 if(g) g=av_gcd(g, error);
3769 printf("static int const visual_weight[][4]={\n");
3770 for(level=0; level<s.spatial_decomposition_count; level++){
3772 for(orientation=0; orientation<4; orientation++){
3773 printf("%8"PRId64",", errors[level][orientation]/g);
3780 int w= width >> (s.spatial_decomposition_count-level);
3781 //int h= height >> (s.spatial_decomposition_count-level);
3782 int stride= width << (s.spatial_decomposition_count-level);
3783 DWTELEM *buf= buffer[0];
3789 memset(buffer[0], 0, sizeof(int)*width*height);
3790 for(y=0; y<height; y++){
3791 for(x=0; x<width; x++){
3792 int tab[4]={0,2,3,1};
3793 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3796 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3797 for(y=0; y<height; y++){
3798 for(x=0; x<width; x++){
3799 int64_t d= buffer[0][x + y*width];
3801 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
3803 if(FFABS(height/2-y)<9) printf("\n");