cosmetics: Add some whitespace for better readability.

[ffmpeg] / libavcodec / snow.c
diff --git a/libavcodec/snow.c b/libavcodec/snow.c

index f4b9ee85f0b7efa178c2edc76c666f1593bd8985..77fa61a3b19e07fbb85b9241eb1fb7250b7b6e4f 100644 (file)
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1,23 +1,24 @@
  /*
   * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
   *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include "avcodec.h"
-#include "common.h"
  #include "dsputil.h"
  #include "snow.h"
  
@@ -392,6 +393,7 @@ static const BlockNode null_block= { //FIXME add border maybe
  
  #define LOG2_MB_SIZE 4
  #define MB_SIZE (1<<LOG2_MB_SIZE)
+#define ENCODER_EXTRA_BITS 4
  
  typedef struct x_and_coeff{
      int16_t x;
@@ -405,6 +407,7 @@ typedef struct SubBand{
      int height;
      int qlog;                                   ///< log(qscale)/log[2^(1/6)]
      DWTELEM *buf;
+    IDWTELEM *ibuf;
      int buf_x_offset;
      int buf_y_offset;
      int stride_line; ///< Stride measured in lines, not pixels.
@@ -420,7 +423,7 @@ typedef struct Plane{
  }Plane;
  
  typedef struct SnowContext{
-//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
+//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
  
      AVCodecContext *avctx;
      RangeCoder c;
@@ -437,6 +440,7 @@ typedef struct SnowContext{
      int always_reset;
      int version;
      int spatial_decomposition_type;
+    int last_spatial_decomposition_type;
      int temporal_decomposition_type;
      int spatial_decomposition_count;
      int temporal_decomposition_count;
@@ -445,20 +449,25 @@ typedef struct SnowContext{
      int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
      uint32_t *ref_scores[MAX_REF_FRAMES];
      DWTELEM *spatial_dwt_buffer;
+    IDWTELEM *spatial_idwt_buffer;
      int colorspace_type;
      int chroma_h_shift;
      int chroma_v_shift;
      int spatial_scalability;
      int qlog;
+    int last_qlog;
      int lambda;
      int lambda2;
      int pass1_rc;
      int mv_scale;
+    int last_mv_scale;
      int qbias;
+    int last_qbias;
  #define QBIAS_SHIFT 3
      int b_width;
      int b_height;
      int block_max_depth;
+    int last_block_max_depth;
      Plane plane[MAX_PLANES];
      BlockNode *block;
  #define ME_CACHE_SIZE 1024
@@ -466,14 +475,14 @@ typedef struct SnowContext{
      int me_cache_generation;
      slice_buffer sb;
  
-    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
+    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
  }SnowContext;
  
  typedef struct {
-    DWTELEM *b0;
-    DWTELEM *b1;
-    DWTELEM *b2;
-    DWTELEM *b3;
+    IDWTELEM *b0;
+    IDWTELEM *b1;
+    IDWTELEM *b2;
+    IDWTELEM *b3;
      int y;
  } dwt_compose_t;
  
@@ -482,7 +491,7 @@ typedef struct {
  
  static void iterative_me(SnowContext *s);
  
-static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
+static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
  {
      int i;
  
@@ -490,21 +499,21 @@ static void slice_buffer_init(slice_buffer * buf, int line_count, int max_alloca
      buf->line_count = line_count;
      buf->line_width = line_width;
      buf->data_count = max_allocated_lines;
-    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
-    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
+    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
+    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
  
      for (i = 0; i < max_allocated_lines; i++)
      {
-      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
+        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
      }
  
      buf->data_stack_top = max_allocated_lines - 1;
  }
  
-static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
  {
      int offset;
-    DWTELEM * buffer;
+    IDWTELEM * buffer;
  
  //  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
  
@@ -526,7 +535,7 @@ static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
  static void slice_buffer_release(slice_buffer * buf, int line)
  {
      int offset;
-    DWTELEM * buffer;
+    IDWTELEM * buffer;
  
      assert(line >= 0 && line < buf->line_count);
      assert(buf->line[line]);
@@ -588,7 +597,7 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
      int i;
  
      if(v){
-        const int a= ABS(v);
+        const int a= FFABS(v);
          const int e= av_log2(a);
  #if 1
          const int el= FFMIN(e, 10);
@@ -707,7 +716,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
      return v;
  }
  
-static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
      const int mirror_left= !highpass;
      const int mirror_right= (width&1) ^ highpass;
      const int w= (width>>1) - 1 + (highpass & width);
@@ -729,47 +738,37 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
      }
  }
  
-#ifndef lift5
-static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
      const int mirror_left= !highpass;
      const int mirror_right= (width&1) ^ highpass;
      const int w= (width>>1) - 1 + (highpass & width);
      int i;
  
+#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
      if(mirror_left){
-        int r= 3*2*ref[0];
-        r += r>>4;
-        r += r>>8;
-        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
+        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
          dst += dst_step;
          src += src_step;
      }
  
      for(i=0; i<w; i++){
-        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
-        r += r>>4;
-        r += r>>8;
-        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
+        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
      }
  
      if(mirror_right){
-        int r= 3*2*ref[w*ref_step];
-        r += r>>4;
-        r += r>>8;
-        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
+        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
      }
  }
-#endif
  
  #ifndef liftS
-static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
      const int mirror_left= !highpass;
      const int mirror_right= (width&1) ^ highpass;
      const int w= (width>>1) - 1 + (highpass & width);
      int i;
  
      assert(shift == 4);
-#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
+#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
      if(mirror_left){
          dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
          dst += dst_step;
@@ -784,235 +783,29 @@ static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
          dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
      }
  }
-#endif
-
-
-static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
-    int x, i;
-
-    for(x=start; x<width; x+=2){
-        int64_t sum=0;
-
-        for(i=0; i<n; i++){
-            int x2= x + 2*i - n + 1;
-            if     (x2<     0) x2= -x2;
-            else if(x2>=width) x2= 2*width-x2-2;
-            sum += coeffs[i]*(int64_t)dst[x2];
-        }
-        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
-        else        dst[x] += (sum + (1<<shift)/2)>>shift;
-    }
-}
-
-static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
-    int x, y, i;
-    for(y=start; y<height; y+=2){
-        for(x=0; x<width; x++){
-            int64_t sum=0;
-
-            for(i=0; i<n; i++){
-                int y2= y + 2*i - n + 1;
-                if     (y2<      0) y2= -y2;
-                else if(y2>=height) y2= 2*height-y2-2;
-                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
-            }
-            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
-            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
-        }
-    }
-}
-
-#define SCALEX 1
-#define LX0 0
-#define LX1 1
-
-#if 0 // more accurate 9/7
-#define N1 2
-#define SHIFT1 14
-#define COEFFS1 (int[]){-25987,-25987}
-#define N2 2
-#define SHIFT2 19
-#define COEFFS2 (int[]){-27777,-27777}
-#define N3 2
-#define SHIFT3 15
-#define COEFFS3 (int[]){28931,28931}
-#define N4 2
-#define SHIFT4 15
-#define COEFFS4 (int[]){14533,14533}
-#elif 1 // 13/7 CRF
-#define N1 4
-#define SHIFT1 4
-#define COEFFS1 (int[]){1,-9,-9,1}
-#define N2 4
-#define SHIFT2 4
-#define COEFFS2 (int[]){-1,5,5,-1}
-#define N3 0
-#define SHIFT3 1
-#define COEFFS3 NULL
-#define N4 0
-#define SHIFT4 1
-#define COEFFS4 NULL
-#elif 1 // 3/5
-#define LX0 1
-#define LX1 0
-#define SCALEX 0.5
-#define N1 2
-#define SHIFT1 1
-#define COEFFS1 (int[]){1,1}
-#define N2 2
-#define SHIFT2 2
-#define COEFFS2 (int[]){-1,-1}
-#define N3 0
-#define SHIFT3 0
-#define COEFFS3 NULL
-#define N4 0
-#define SHIFT4 0
-#define COEFFS4 NULL
-#elif 1 // 11/5
-#define N1 0
-#define SHIFT1 1
-#define COEFFS1 NULL
-#define N2 2
-#define SHIFT2 2
-#define COEFFS2 (int[]){-1,-1}
-#define N3 2
-#define SHIFT3 0
-#define COEFFS3 (int[]){-1,-1}
-#define N4 4
-#define SHIFT4 7
-#define COEFFS4 (int[]){-5,29,29,-5}
-#define SCALEX 4
-#elif 1 // 9/7 CDF
-#define N1 2
-#define SHIFT1 7
-#define COEFFS1 (int[]){-203,-203}
-#define N2 2
-#define SHIFT2 12
-#define COEFFS2 (int[]){-217,-217}
-#define N3 2
-#define SHIFT3 7
-#define COEFFS3 (int[]){113,113}
-#define N4 2
-#define SHIFT4 9
-#define COEFFS4 (int[]){227,227}
-#define SCALEX 1
-#elif 1 // 7/5 CDF
-#define N1 0
-#define SHIFT1 1
-#define COEFFS1 NULL
-#define N2 2
-#define SHIFT2 2
-#define COEFFS2 (int[]){-1,-1}
-#define N3 2
-#define SHIFT3 0
-#define COEFFS3 (int[]){-1,-1}
-#define N4 2
-#define SHIFT4 4
-#define COEFFS4 (int[]){3,3}
-#elif 1 // 9/7 MN
-#define N1 4
-#define SHIFT1 4
-#define COEFFS1 (int[]){1,-9,-9,1}
-#define N2 2
-#define SHIFT2 2
-#define COEFFS2 (int[]){1,1}
-#define N3 0
-#define SHIFT3 1
-#define COEFFS3 NULL
-#define N4 0
-#define SHIFT4 1
-#define COEFFS4 NULL
-#else // 13/7 CRF
-#define N1 4
-#define SHIFT1 4
-#define COEFFS1 (int[]){1,-9,-9,1}
-#define N2 4
-#define SHIFT2 4
-#define COEFFS2 (int[]){-1,5,5,-1}
-#define N3 0
-#define SHIFT3 1
-#define COEFFS3 NULL
-#define N4 0
-#define SHIFT4 1
-#define COEFFS4 NULL
-#endif
-static void horizontal_decomposeX(DWTELEM *b, int width){
-    DWTELEM temp[width];
-    const int width2= width>>1;
-    const int w2= (width+1)>>1;
-    int x;
-
-    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
-    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
-    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
-    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
-
-    for(x=0; x<width2; x++){
-        temp[x   ]= b[2*x    ];
-        temp[x+w2]= b[2*x + 1];
-    }
-    if(width&1)
-        temp[x   ]= b[2*x    ];
-    memcpy(b, temp, width*sizeof(int));
-}
-
-static void horizontal_composeX(DWTELEM *b, int width){
-    DWTELEM temp[width];
-    const int width2= width>>1;
-    int x;
-    const int w2= (width+1)>>1;
-
-    memcpy(temp, b, width*sizeof(int));
-    for(x=0; x<width2; x++){
-        b[2*x    ]= temp[x   ];
-        b[2*x + 1]= temp[x+w2];
-    }
-    if(width&1)
-        b[2*x    ]= temp[x   ];
-
-    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
-    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
-    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
-    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
-}
-
-static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
-    int x, y;
-
-    for(y=0; y<height; y++){
-        for(x=0; x<width; x++){
-            buffer[y*stride + x] *= SCALEX;
-        }
-    }
+static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+    const int mirror_left= !highpass;
+    const int mirror_right= (width&1) ^ highpass;
+    const int w= (width>>1) - 1 + (highpass & width);
+    int i;
  
-    for(y=0; y<height; y++){
-        horizontal_decomposeX(buffer + y*stride, width);
+    assert(shift == 4);
+#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
+    if(mirror_left){
+        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
+        dst += dst_step;
+        src += src_step;
      }
  
-    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
-    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
-    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
-    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
-}
-
-static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
-    int x, y;
-
-    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
-    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
-    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
-    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
-
-    for(y=0; y<height; y++){
-        horizontal_composeX(buffer + y*stride, width);
+    for(i=0; i<w; i++){
+        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
      }
  
-    for(y=0; y<height; y++){
-        for(x=0; x<width; x++){
-            buffer[y*stride + x] /= SCALEX;
-        }
+    if(mirror_right){
+        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
      }
  }
+#endif
  
  static void horizontal_decompose53i(DWTELEM *b, int width){
      DWTELEM temp[width];
@@ -1107,9 +900,9 @@ static void horizontal_decompose97i(DWTELEM *b, int width){
      DWTELEM temp[width];
      const int w2= (width+1)>>1;
  
-    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
-    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
-    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
+    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
+    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
+    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
      lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
  }
  
@@ -1126,14 +919,7 @@ static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
      int i;
  
      for(i=0; i<width; i++){
-#ifdef lift5
          b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
-#else
-        int r= 3*(b0[i] + b2[i]);
-        r+= r>>4;
-        r+= r>>8;
-        b1[i] += (r+W_CO)>>W_CS;
-#endif
      }
  }
  
@@ -1144,7 +930,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
  #ifdef liftS
          b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
  #else
-        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
+        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
  #endif
      }
  }
@@ -1199,13 +985,12 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type
          switch(type){
          case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
          case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
-        case DWT_X: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
          }
      }
  }
  
-static void horizontal_compose53i(DWTELEM *b, int width){
-    DWTELEM temp[width];
+static void horizontal_compose53i(IDWTELEM *b, int width){
+    IDWTELEM temp[width];
      const int width2= width>>1;
      const int w2= (width+1)>>1;
      int x;
@@ -1240,8 +1025,8 @@ static void horizontal_compose53i(DWTELEM *b, int width){
      b[width -1] = A3;
      b[width2-1] = A2;
  #else
-    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
-    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
+    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
+    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
  #endif
      for(x=0; x<width2; x++){
          b[2*x    ]= temp[x   ];
@@ -1251,7 +1036,7 @@ static void horizontal_compose53i(DWTELEM *b, int width){
          b[2*x    ]= temp[x   ];
  }
  
-static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
@@ -1259,7 +1044,7 @@ static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
      }
  }
  
-static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
@@ -1273,7 +1058,7 @@ static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * s
      cs->y = -1;
  }
  
-static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
+static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
      cs->b0 = buffer + mirror(-1-1, height-1)*stride;
      cs->b1 = buffer + mirror(-1  , height-1)*stride;
      cs->y = -1;
@@ -1282,10 +1067,10 @@ static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig
  static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
      int y= cs->y;
  
-    DWTELEM *b0= cs->b0;
-    DWTELEM *b1= cs->b1;
-    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
-    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
+    IDWTELEM *b0= cs->b0;
+    IDWTELEM *b1= cs->b1;
+    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
+    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
  
  {START_TIMER
          if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
@@ -1302,12 +1087,12 @@ STOP_TIMER("horizontal_compose53i")}
      cs->y += 2;
  }
  
-static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
      int y= cs->y;
-    DWTELEM *b0= cs->b0;
-    DWTELEM *b1= cs->b1;
-    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
-    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
+    IDWTELEM *b0= cs->b0;
+    IDWTELEM *b1= cs->b1;
+    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
+    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
  
  {START_TIMER
          if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
@@ -1324,7 +1109,7 @@ STOP_TIMER("horizontal_compose53i")}
      cs->y += 2;
  }
  
-static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
      dwt_compose_t cs;
      spatial_compose53i_init(&cs, buffer, height, stride);
      while(cs.y <= height)
@@ -1332,17 +1117,17 @@ static void spatial_compose53i(DWTELEM *buffer, int width, int height, int strid
  }
  
  
-void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
-    DWTELEM temp[width];
+void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
+    IDWTELEM temp[width];
      const int w2= (width+1)>>1;
  
-    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
-    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
-    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
-    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
+    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
+    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
+    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
+    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
  }
  
-static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
@@ -1350,22 +1135,15 @@ static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
      }
  }
  
-static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
-#ifdef lift5
          b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
-#else
-        int r= 3*(b0[i] + b2[i]);
-        r+= r>>4;
-        r+= r>>8;
-        b1[i] -= (r+W_CO)>>W_CS;
-#endif
      }
  }
  
-static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
@@ -1377,7 +1155,7 @@ static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
      }
  }
  
-static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
      int i;
  
      for(i=0; i<width; i++){
@@ -1385,22 +1163,12 @@ static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
      }
  }
  
-void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
      int i;
  
      for(i=0; i<width; i++){
-#ifndef lift5
-        int r;
-#endif
          b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
-#ifdef lift5
          b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
-#else
-        r= 3*(b2[i] + b4[i]);
-        r+= r>>4;
-        r+= r>>8;
-        b3[i] -= (r+W_CO)>>W_CS;
-#endif
  #ifdef liftS
          b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
  #else
@@ -1418,7 +1186,7 @@ static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * s
      cs->y = -3;
  }
  
-static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
+static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
      cs->b0 = buffer + mirror(-3-1, height-1)*stride;
      cs->b1 = buffer + mirror(-3  , height-1)*stride;
      cs->b2 = buffer + mirror(-3+1, height-1)*stride;
@@ -1429,12 +1197,12 @@ static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig
  static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
      int y = cs->y;
  
-    DWTELEM *b0= cs->b0;
-    DWTELEM *b1= cs->b1;
-    DWTELEM *b2= cs->b2;
-    DWTELEM *b3= cs->b3;
-    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
-    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
+    IDWTELEM *b0= cs->b0;
+    IDWTELEM *b1= cs->b1;
+    IDWTELEM *b2= cs->b2;
+    IDWTELEM *b3= cs->b3;
+    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
+    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
  
  {START_TIMER
      if(y>0 && y+4<height){
@@ -1461,14 +1229,14 @@ STOP_TIMER("horizontal_compose97i")}}
      cs->y += 2;
  }
  
-static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
      int y = cs->y;
-    DWTELEM *b0= cs->b0;
-    DWTELEM *b1= cs->b1;
-    DWTELEM *b2= cs->b2;
-    DWTELEM *b3= cs->b3;
-    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
-    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
+    IDWTELEM *b0= cs->b0;
+    IDWTELEM *b1= cs->b1;
+    IDWTELEM *b2= cs->b2;
+    IDWTELEM *b3= cs->b3;
+    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
+    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
  
  {START_TIMER
          if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
@@ -1491,7 +1259,7 @@ STOP_TIMER("horizontal_compose97i")}}
      cs->y += 2;
  }
  
-static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
      dwt_compose_t cs;
      spatial_compose97i_init(&cs, buffer, height, stride);
      while(cs.y <= height)
@@ -1504,26 +1272,21 @@ static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb,
          switch(type){
          case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
          case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
-        /* not slicified yet */
-        case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
-          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
          }
      }
  }
  
-static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
      int level;
      for(level=decomposition_count-1; level>=0; level--){
          switch(type){
          case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
          case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
-        /* not slicified yet */
-        case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
          }
      }
  }
  
-static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
      const int support = type==1 ? 3 : 5;
      int level;
      if(type==2) return;
@@ -1535,7 +1298,6 @@ static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width,
                      break;
              case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
                      break;
-            case DWT_X: break;
              }
          }
      }
@@ -1553,27 +1315,20 @@ static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, s
                      break;
              case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
                      break;
-            case DWT_X: break;
              }
          }
      }
  }
  
-static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
-    if(type==2){
-        int level;
-        for(level=decomposition_count-1; level>=0; level--)
-            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
-    }else{
+static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
          dwt_compose_t cs[MAX_DECOMPOSITIONS];
          int y;
          ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
          for(y=0; y<height; y+=4)
              ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
-    }
  }
  
-static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
+static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
      const int w= b->width;
      const int h= b->height;
      int x, y;
@@ -1664,7 +1419,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
                          p= parent[px + py*2*stride];
                  }
                  if(/*ll|*/l|lt|t|rt|p){
-                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
  
                      put_rac(&s->c, &b->state[0][context], !!v);
                  }else{
@@ -1680,11 +1435,11 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
                      }
                  }
                  if(v){
-                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
-                    int l2= 2*ABS(l) + (l<0);
-                    int t2= 2*ABS(t) + (t<0);
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+                    int l2= 2*FFABS(l) + (l<0);
+                    int t2= 2*FFABS(t) + (t<0);
  
-                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
+                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
                      put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
                  }
              }
@@ -1693,7 +1448,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
      return 0;
  }
  
-static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
+static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
  //    encode_subband_qtree(s, b, src, parent, stride, orientation);
  //    encode_subband_z0run(s, b, src, parent, stride, orientation);
      return encode_subband_c0run(s, b, src, parent, stride, orientation);
@@ -1747,7 +1502,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
                      }
                  }
                  if(/*ll|*/l|lt|t|rt|p){
-                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
+                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
  
                      v=get_rac(&s->c, &b->state[0][context]);
                      if(v){
@@ -1803,14 +1558,14 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
  static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
      const int w= b->width;
      int y;
-    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
      int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
      int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
      int new_index = 0;
  
      START_TIMER
  
-    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
+    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
          qadd= 0;
          qmul= 1<<QEXPSHIFT;
      }
@@ -1823,8 +1578,8 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
      for(y=start_y; y<h; y++){
          int x = 0;
          int v;
-        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
-        memset(line, 0, b->width*sizeof(DWTELEM));
+        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
+        memset(line, 0, b->width*sizeof(IDWTELEM));
          v = b->x_coeff[new_index].coeff;
          x = b->x_coeff[new_index++].x;
          while(x < w)
@@ -1847,7 +1602,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
      return;
  }
  
-static void reset_contexts(SnowContext *s){
+static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
      int plane_index, level, orientation;
  
      for(plane_index=0; plane_index<3; plane_index++){
@@ -1900,7 +1655,7 @@ static int pix_sum(uint8_t * pix, int line_size, int w)
  static int pix_norm1(uint8_t * pix, int line_size, int w)
  {
      int s, i, j;
-    uint32_t *sq = squareTbl + 256;
+    uint32_t *sq = ff_squareTbl + 256;
  
      s = 0;
      for (i = 0; i < w; i++) {
@@ -1952,18 +1707,18 @@ static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3
  }
  
  static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
-                           BlockNode *left, BlockNode *top, BlockNode *tr){
+                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
      if(s->ref_frames == 1){
          *mx = mid_pred(left->mx, top->mx, tr->mx);
          *my = mid_pred(left->my, top->my, tr->my);
      }else{
          const int *scale = scale_mv_ref[ref];
-        *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
-                       top ->mx * scale[top ->ref] + 128 >>8,
-                       tr  ->mx * scale[tr  ->ref] + 128 >>8);
-        *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
-                       top ->my * scale[top ->ref] + 128 >>8,
-                       tr  ->my * scale[tr  ->ref] + 128 >>8);
+        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
+                       (top ->mx * scale[top ->ref] + 128) >>8,
+                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
+        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
+                       (top ->my * scale[top ->ref] + 128) >>8,
+                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
      }
  }
  
@@ -1983,7 +1738,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      RangeCoder pc, ic;
      uint8_t *pbbak= s->c.bytestream;
      uint8_t *pbbak_start= s->c.bytestream_start;
-    int score, score2, iscore, i_len, p_len, block_s, sum;
+    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
      const int w= s->b_width  << s->block_max_depth;
      const int h= s->b_height << s->block_max_depth;
      const int rem_depth= s->block_max_depth - level;
@@ -1991,12 +1746,12 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      const int block_w= 1<<(LOG2_MB_SIZE - level);
      int trx= (x+1)<<rem_depth;
      int try= (y+1)<<rem_depth;
-    BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
-    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
-    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
+    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
      int pl = left->color[0];
      int pcb= left->color[1];
      int pcr= left->color[2];
@@ -2014,8 +1769,8 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      const int shift= 1+qpel;
      MotionEstContext *c= &s->m.me;
      int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-    int mx_context= av_log2(2*ABS(left->mx - top->mx));
-    int my_context= av_log2(2*ABS(left->my - top->my));
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+    int my_context= av_log2(2*FFABS(left->my - top->my));
      int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
      int ref, best_ref, ref_score, ref_mx, ref_my;
  
@@ -2044,10 +1799,10 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      s->m.mb_stride=2;
      s->m.mb_x=
      s->m.mb_y= 0;
-    s->m.me.skip= 0;
+    c->skip= 0;
  
-    assert(s->m.me.  stride ==   stride);
-    assert(s->m.me.uvstride == uvstride);
+    assert(c->  stride ==   stride);
+    assert(c->uvstride == uvstride);
  
      c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
      c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
@@ -2091,7 +1846,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
          assert(ref_my >= c->ymin);
          assert(ref_my <= c->ymax);
  
-        ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
+        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
          ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
          ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
          if(s->ref_mvs[ref]){
@@ -2106,9 +1861,10 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
              my= ref_my;
          }
      }
-    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
+    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
  
    //  subpel search
+    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
      pc= s->c;
      pc.bytestream_start=
      pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
@@ -2123,10 +1879,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
      put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
      p_len= pc.bytestream - pc.bytestream_start;
-    score += (s->lambda2*(p_len*8
-              + (pc.outstanding_count - s->c.outstanding_count)*8
-              + (-av_log2(pc.range)    + av_log2(s->c.range))
-             ))>>FF_LAMBDA_SHIFT;
+    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
  
      block_s= block_w*block_w;
      sum = pix_sum(current_data[0], stride, block_w);
@@ -2152,10 +1905,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      put_symbol(&ic, &i_state[64], cb-pcb, 1);
      put_symbol(&ic, &i_state[96], cr-pcr, 1);
      i_len= ic.bytestream - ic.bytestream_start;
-    iscore += (s->lambda2*(i_len*8
-              + (ic.outstanding_count - s->c.outstanding_count)*8
-              + (-av_log2(ic.range)    + av_log2(s->c.range))
-             ))>>FF_LAMBDA_SHIFT;
+    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
  
  //    assert(score==256*256*256*64-1);
      assert(iscore < 255*255*256 + s->lambda2*10);
@@ -2204,7 +1954,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
      }
  }
  
-static always_inline int same_block(BlockNode *a, BlockNode *b){
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
      if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
          return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
      }else{
@@ -2218,17 +1968,17 @@ static void encode_q_branch2(SnowContext *s, int level, int x, int y){
      const int index= (x + y*w) << rem_depth;
      int trx= (x+1)<<rem_depth;
      BlockNode *b= &s->block[index];
-    BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
      int pl = left->color[0];
      int pcb= left->color[1];
      int pcr= left->color[2];
      int pmx, pmy;
      int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-    int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
-    int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
+    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
      int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
  
      if(s->keyframe){
@@ -2271,10 +2021,10 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
      const int rem_depth= s->block_max_depth - level;
      const int index= (x + y*w) << rem_depth;
      int trx= (x+1)<<rem_depth;
-    BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    BlockNode *top   = y ? &s->block[index-w] : &null_block;
-    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
      int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
  
      if(s->keyframe){
@@ -2283,16 +2033,14 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
      }
  
      if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
-        int type;
+        int type, mx, my;
          int l = left->color[0];
          int cb= left->color[1];
          int cr= left->color[2];
-        int mx= mid_pred(left->mx, top->mx, tr->mx);
-        int my= mid_pred(left->my, top->my, tr->my);
          int ref = 0;
          int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
-        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
+        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
  
          type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
  
@@ -2351,7 +2099,7 @@ static void decode_blocks(SnowContext *s){
      }
  }
  
-static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
      int x, y;
  START_TIMER
      for(y=0; y < b_h+5; y++){
@@ -2421,7 +2169,7 @@ STOP_TIMER("mc_block")
  }
  
  #define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
      uint8_t tmp[stride*(b_w+5)];\
      assert(h==b_w);\
      mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
@@ -2516,16 +2264,16 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i
      }
  }
  
-void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                                int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
      int y, x;
-    DWTELEM * dst;
+    IDWTELEM * dst;
      for(y=0; y<b_h; y++){
-        //FIXME ugly missue of obmc_stride
-        uint8_t *obmc1= obmc + y*obmc_stride;
-        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
-        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
-        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        //FIXME ugly misuse of obmc_stride
+        const uint8_t *obmc1= obmc + y*obmc_stride;
+        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
          dst = slice_buffer_get_line(sb, src_y + y);
          for(x=0; x<b_w; x++){
              int v=   obmc1[x] * block[3][x + y*src_stride]
@@ -2535,7 +2283,6 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * *
  
              v <<= 8 - LOG2_OBMC_MAX;
              if(FRAC_BITS != 8){
-                v += 1<<(7 - FRAC_BITS);
                  v >>= 8 - FRAC_BITS;
              }
              if(add){
@@ -2551,7 +2298,7 @@ void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * *
  }
  
  //FIXME name clenup (b_w, block_w, b_width stuff)
-static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
      const int b_width = s->b_width  << s->block_max_depth;
      const int b_height= s->b_height << s->block_max_depth;
      const int b_stride= b_width;
@@ -2681,11 +2428,11 @@ assert(src_stride > 2*MB_SIZE + 5);
          STOP_TIMER("inner_add_yblock")
      }else
      for(y=0; y<b_h; y++){
-        //FIXME ugly missue of obmc_stride
-        uint8_t *obmc1= obmc + y*obmc_stride;
-        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
-        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
-        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        //FIXME ugly misuse of obmc_stride
+        const uint8_t *obmc1= obmc + y*obmc_stride;
+        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
          for(x=0; x<b_w; x++){
              int v=   obmc1[x] * block[3][x + y*src_stride]
                      +obmc2[x] * block[2][x + y*src_stride]
@@ -2694,7 +2441,6 @@ assert(src_stride > 2*MB_SIZE + 5);
  
              v <<= 8 - LOG2_OBMC_MAX;
              if(FRAC_BITS != 8){
-                v += 1<<(7 - FRAC_BITS);
                  v >>= 8 - FRAC_BITS;
              }
              if(add){
@@ -2710,7 +2456,7 @@ assert(src_stride > 2*MB_SIZE + 5);
  #endif
  }
  
-static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
      Plane *p= &s->plane[plane_index];
      const int mb_w= s->b_width  << s->block_max_depth;
      const int mb_h= s->b_height << s->block_max_depth;
@@ -2733,7 +2479,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
              for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
              {
  //                DWTELEM * line = slice_buffer_get_line(sb, y);
-                DWTELEM * line = sb->line[y];
+                IDWTELEM * line = sb->line[y];
                  for(x=0; x<w; x++)
                  {
  //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
@@ -2747,7 +2493,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
              for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
              {
  //                DWTELEM * line = slice_buffer_get_line(sb, y);
-                DWTELEM * line = sb->line[y];
+                IDWTELEM * line = sb->line[y];
                  for(x=0; x<w; x++)
                  {
                      line[x] -= 128 << FRAC_BITS;
@@ -2777,7 +2523,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
      STOP_TIMER("predict_slice")
  }
  
-static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
+static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
      Plane *p= &s->plane[plane_index];
      const int mb_w= s->b_width  << s->block_max_depth;
      const int mb_h= s->b_height << s->block_max_depth;
@@ -2834,7 +2580,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
      STOP_TIMER("predict_slice")
  }
  
-static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
+static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
      const int mb_h= s->b_height << s->block_max_depth;
      int mb_y;
      for(mb_y=0; mb_y<=mb_h; mb_y++)
@@ -2850,7 +2596,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
      const int obmc_stride= plane_index ? block_size : 2*block_size;
      const int ref_stride= s->current_picture.linesize[plane_index];
      uint8_t *src= s-> input_picture.data[plane_index];
-    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
      const int b_stride = s->b_width << s->block_max_depth;
      const int w= p->width;
      const int h= p->height;
@@ -2862,7 +2608,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
  
      b->type|= BLOCK_INTRA;
      b->color[plane_index]= 0;
-    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
+    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
  
      for(i=0; i<4; i++){
          int mb_x2= mb_x + (i &1) - 1;
@@ -2893,21 +2639,21 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
      }
      *b= backup;
  
-    return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
+    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
  }
  
  static inline int get_block_bits(SnowContext *s, int x, int y, int w){
      const int b_stride = s->b_width << s->block_max_depth;
      const int b_height = s->b_height<< s->block_max_depth;
      int index= x + y*b_stride;
-    BlockNode *b     = &s->block[index];
-    BlockNode *left  = x ? &s->block[index-1] : &null_block;
-    BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
-    BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
-    BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
+    const BlockNode *b     = &s->block[index];
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
+    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
      int dmx, dmy;
-//  int mx_context= av_log2(2*ABS(left->mx - top->mx));
-//  int my_context= av_log2(2*ABS(left->my - top->my));
+//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+//  int my_context= av_log2(2*FFABS(left->my - top->my));
  
      if(x<0 || x>=b_stride || y>=b_height)
          return 0;
@@ -2921,15 +2667,15 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){
  //FIXME try accurate rate
  //FIXME intra and inter predictors if surrounding blocks arent the same type
      if(b->type & BLOCK_INTRA){
-        return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
-                   + av_log2(2*ABS(left->color[1] - b->color[1]))
-                   + av_log2(2*ABS(left->color[2] - b->color[2])));
+        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
+                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
+                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
      }else{
          pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
          dmx-= b->mx;
          dmy-= b->my;
-        return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
-                    + av_log2(2*ABS(dmy))
+        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
+                    + av_log2(2*FFABS(dmy))
                      + av_log2(2*b->ref));
      }
  }
@@ -2938,12 +2684,11 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
      Plane *p= &s->plane[plane_index];
      const int block_size = MB_SIZE >> s->block_max_depth;
      const int block_w    = plane_index ? block_size/2 : block_size;
-    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
      const int obmc_stride= plane_index ? block_size : 2*block_size;
      const int ref_stride= s->current_picture.linesize[plane_index];
      uint8_t *dst= s->current_picture.data[plane_index];
      uint8_t *src= s->  input_picture.data[plane_index];
-    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
      uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
      uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
      const int b_stride = s->b_width << s->block_max_depth;
@@ -2965,11 +2710,15 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
  
      for(y=y0; y<y1; y++){
          const uint8_t *obmc1= obmc_edged + y*obmc_stride;
-        const DWTELEM *pred1 = pred + y*obmc_stride;
+        const IDWTELEM *pred1 = pred + y*obmc_stride;
          uint8_t *cur1 = cur + y*ref_stride;
          uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
          for(x=x0; x<x1; x++){
+#if FRAC_BITS >= LOG2_OBMC_MAX
              int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
+#else
+            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
+#endif
              v = (v + pred1[x]) >> FRAC_BITS;
              if(v&(~255)) v= ~(v>>31);
              dst1[x] = v;
@@ -3039,9 +2788,8 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
      const int ref_stride= s->current_picture.linesize[plane_index];
      uint8_t *dst= s->current_picture.data[plane_index];
      uint8_t *src= s-> input_picture.data[plane_index];
-    static const DWTELEM zero_dst[4096]; //FIXME
+    static const IDWTELEM zero_dst[4096]; //FIXME
      const int b_stride = s->b_width << s->block_max_depth;
-    const int b_height = s->b_height<< s->block_max_depth;
      const int w= p->width;
      const int h= p->height;
      int distortion= 0;
@@ -3094,7 +2842,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
      return distortion + rate*penalty_factor;
  }
  
-static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
      const int b_stride= s->b_width << s->block_max_depth;
      BlockNode *block= &s->block[mb_x + mb_y * b_stride];
      BlockNode backup= *block;
@@ -3133,12 +2881,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
  }
  
  /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
-static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
      int p[2] = {p0, p1};
      return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
  }
  
-static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
      const int b_stride= s->b_width << s->block_max_depth;
      BlockNode *block= &s->block[mb_x + mb_y * b_stride];
      BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
@@ -3279,7 +3027,7 @@ static void iterative_me(SnowContext *s){
                  for(i=0; i<3; i++)
                      color[i]= get_dc(s, mb_x, mb_y, i);
  
-                // get previous score (cant be cached due to OBMC)
+                // get previous score (cannot be cached due to OBMC)
                  if(pass > 0 && (block->type&BLOCK_INTRA)){
                      int color0[3]= {block->color[0], block->color[1], block->color[2]};
                      check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
@@ -3400,16 +3148,21 @@ static void iterative_me(SnowContext *s){
      }
  }
  
-static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
+static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
      const int level= b->level;
      const int w= b->width;
      const int h= b->height;
-    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
-    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
      int x,y, thres1, thres2;
  //    START_TIMER
  
-    if(s->qlog == LOSSLESS_QLOG) return;
+    if(s->qlog == LOSSLESS_QLOG){
+        for(y=0; y<h; y++)
+            for(x=0; x<w; x++)
+                dst[x + y*stride]= src[x + y*stride];
+        return;
+    }
  
      bias= bias ? 0 : (3*qmul)>>3;
      thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
@@ -3424,15 +3177,15 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b
                      if(i>=0){
                          i<<= QEXPSHIFT;
                          i/= qmul; //FIXME optimize
-                        src[x + y*stride]=  i;
+                        dst[x + y*stride]=  i;
                      }else{
                          i= -i;
                          i<<= QEXPSHIFT;
                          i/= qmul; //FIXME optimize
-                        src[x + y*stride]= -i;
+                        dst[x + y*stride]= -i;
                      }
                  }else
-                    src[x + y*stride]= 0;
+                    dst[x + y*stride]= 0;
              }
          }
      }else{
@@ -3444,15 +3197,15 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b
                      if(i>=0){
                          i<<= QEXPSHIFT;
                          i= (i + bias) / qmul; //FIXME optimize
-                        src[x + y*stride]=  i;
+                        dst[x + y*stride]=  i;
                      }else{
                          i= -i;
                          i<<= QEXPSHIFT;
                          i= (i + bias) / qmul; //FIXME optimize
-                        src[x + y*stride]= -i;
+                        dst[x + y*stride]= -i;
                      }
                  }else
-                    src[x + y*stride]= 0;
+                    dst[x + y*stride]= 0;
              }
          }
      }
@@ -3461,9 +3214,9 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b
      }
  }
  
-static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
+static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
      const int w= b->width;
-    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
      const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
      const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
      int x,y;
@@ -3473,7 +3226,7 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand
  
      for(y=start_y; y<end_y; y++){
  //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
-        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
          for(x=0; x<w; x++){
              int i= line[x];
              if(i<0){
@@ -3488,10 +3241,10 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand
      }
  }
  
-static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
+static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
      const int w= b->width;
      const int h= b->height;
-    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
      const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
      const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
      int x,y;
@@ -3514,7 +3267,7 @@ static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
      }
  }
  
-static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
+static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
      const int w= b->width;
      const int h= b->height;
      int x,y;
@@ -3538,14 +3291,14 @@ static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, in
      }
  }
  
-static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
+static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
      const int w= b->width;
      int x,y;
  
  //    START_TIMER
  
-    DWTELEM * line;
-    DWTELEM * prev;
+    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
+    IDWTELEM * prev;
  
      if (start_y != 0)
          line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
@@ -3572,7 +3325,7 @@ static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand
  //    STOP_TIMER("correlate")
  }
  
-static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
+static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
      const int w= b->width;
      const int h= b->height;
      int x,y;
@@ -3603,8 +3356,14 @@ static void encode_header(SnowContext *s){
      memset(kstate, MID_STATE, sizeof(kstate));
  
      put_rac(&s->c, kstate, s->keyframe);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
          reset_contexts(s);
+        s->last_spatial_decomposition_type=
+        s->last_qlog=
+        s->last_qbias=
+        s->last_mv_scale=
+        s->last_block_max_depth= 0;
+    }
      if(s->keyframe){
          put_symbol(&s->c, s->header_state, s->version, 0);
          put_rac(&s->c, s->header_state, s->always_reset);
@@ -3627,11 +3386,17 @@ static void encode_header(SnowContext *s){
              }
          }
      }
-    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
-    put_symbol(&s->c, s->header_state, s->qlog, 1);
-    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
-    put_symbol(&s->c, s->header_state, s->qbias, 1);
-    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
+    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
+    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
+    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
+    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+
+    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
+    s->last_qlog                      = s->qlog;
+    s->last_qbias                     = s->qbias;
+    s->last_mv_scale                  = s->mv_scale;
+    s->last_block_max_depth           = s->block_max_depth;
  }
  
  static int decode_header(SnowContext *s){
@@ -3641,8 +3406,14 @@ static int decode_header(SnowContext *s){
      memset(kstate, MID_STATE, sizeof(kstate));
  
      s->keyframe= get_rac(&s->c, kstate);
-    if(s->keyframe || s->always_reset)
+    if(s->keyframe || s->always_reset){
          reset_contexts(s);
+        s->spatial_decomposition_type=
+        s->qlog=
+        s->qbias=
+        s->mv_scale=
+        s->block_max_depth= 0;
+    }
      if(s->keyframe){
          s->version= get_symbol(&s->c, s->header_state, 0);
          if(s->version>0){
@@ -3673,16 +3444,16 @@ static int decode_header(SnowContext *s){
          }
      }
  
-    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
-    if(s->spatial_decomposition_type > 2){
+    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
+    if(s->spatial_decomposition_type > 1){
          av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
          return -1;
      }
  
-    s->qlog= get_symbol(&s->c, s->header_state, 1);
-    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
-    s->qbias= get_symbol(&s->c, s->header_state, 1);
-    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+    s->qlog           += get_symbol(&s->c, s->header_state, 1);
+    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
+    s->qbias          += get_symbol(&s->c, s->header_state, 1);
+    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
      if(s->block_max_depth > 1 || s->block_max_depth < 0){
          av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
          s->block_max_depth= 0;
@@ -3764,7 +3535,8 @@ static int common_init(AVCodecContext *avctx){
      width= s->avctx->width;
      height= s->avctx->height;
  
-    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
+    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
+    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
  
      s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
      s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
@@ -3802,6 +3574,7 @@ static int common_init(AVCodecContext *avctx){
                      b->buf += b->stride>>1;
                      b->buf_y_offset = b->stride_line >> 1;
                  }
+                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
  
                  if(level)
                      b->parent= &s->plane[plane_index].band[level-1][orientation];
@@ -3844,21 +3617,23 @@ static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
      for(level=0; level<s->spatial_decomposition_count; level++){
          for(orientation=level ? 1 : 0; orientation<4; orientation++){
              SubBand *b= &s->plane[0].band[level][orientation];
-            DWTELEM *buf= b->buf;
+            IDWTELEM *buf= b->ibuf;
              const int w= b->width;
              const int h= b->height;
              const int stride= b->stride;
-            const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
+            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
              const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
              const int qdiv= (1<<16)/qmul;
              int x, y;
+            //FIXME this is ugly
+            for(y=0; y<h; y++)
+                for(x=0; x<w; x++)
+                    buf[x+y*stride]= b->buf[x+y*stride];
              if(orientation==0)
                  decorrelate(s, b, buf, stride, 1, 0);
              for(y=0; y<h; y++)
                  for(x=0; x<w; x++)
                      coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
-            if(orientation==0)
-                correlate(s, b, buf, stride, 1, 0);
          }
      }
  
@@ -3875,6 +3650,8 @@ static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
      }
  
      pict->quality= ff_rate_estimate_qscale(&s->m, 1);
+    if (pict->quality < 0)
+        return INT_MIN;
      s->lambda= pict->quality * 3/2;
      delta_qlog= qscale2qlog(pict->quality) - s->qlog;
      s->qlog+= delta_qlog;
@@ -3889,15 +3666,15 @@ static void calculate_vissual_weight(SnowContext *s, Plane *p){
      for(level=0; level<s->spatial_decomposition_count; level++){
          for(orientation=level ? 1 : 0; orientation<4; orientation++){
              SubBand *b= &p->band[level][orientation];
-            DWTELEM *buf= b->buf;
+            IDWTELEM *ibuf= b->ibuf;
              int64_t error=0;
  
-            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
-            buf[b->width/2 + b->height/2*b->stride]= 256*256;
-            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
+            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
+            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
              for(y=0; y<height; y++){
                  for(x=0; x<width; x++){
-                    int64_t d= s->spatial_dwt_buffer[x + y*width];
+                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
                      error += d*d;
                  }
              }
@@ -3968,7 +3745,7 @@ static int encode_init(AVCodecContext *avctx)
  //    case PIX_FMT_YUV410P:
          s->colorspace_type= 0;
          break;
-/*    case PIX_FMT_RGBA32:
+/*    case PIX_FMT_RGB32:
          s->colorspace= 1;
          break;*/
      default:
@@ -4060,8 +3837,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
          s->m.pict_type =
          pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
          s->keyframe= pict->pict_type==FF_I_TYPE;
-        if(!(avctx->flags&CODEC_FLAG_QSCALE))
+        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
              pict->quality= ff_rate_estimate_qscale(&s->m, 0);
+            if (pict->quality < 0)
+                return -1;
+        }
      }else{
          s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
          s->m.pict_type=
@@ -4151,10 +3931,10 @@ redo_frame:
       if(pict->data[plane_index]) //FIXME gray hack
          for(y=0; y<h; y++){
              for(x=0; x<w; x++){
-                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
+                s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
              }
          }
-        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
+        predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
  
          if(   plane_index==0
             && pict->pict_type == P_TYPE
@@ -4165,14 +3945,19 @@ redo_frame:
              pict->pict_type= FF_I_TYPE;
              s->keyframe=1;
              s->current_picture.key_frame=1;
-            reset_contexts(s);
              goto redo_frame;
          }
  
          if(s->qlog == LOSSLESS_QLOG){
              for(y=0; y<h; y++){
                  for(x=0; x<w; x++){
-                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
+                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
+                }
+            }
+        }else{
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
                  }
              }
          }
@@ -4181,6 +3966,8 @@ redo_frame:
  
          if(s->pass1_rc && plane_index==0){
              int delta_qlog = ratecontrol_1pass(s, pict);
+            if (delta_qlog <= INT_MIN)
+                return -1;
              if(delta_qlog){
                  //reordering qlog in the bitstream would eliminate this reset
                  ff_init_range_encoder(c, buf, buf_size);
@@ -4195,13 +3982,13 @@ redo_frame:
              for(orientation=level ? 1 : 0; orientation<4; orientation++){
                  SubBand *b= &p->band[level][orientation];
  
-                quantize(s, b, b->buf, b->stride, s->qbias);
+                quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
                  if(orientation==0)
-                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
-                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
+                    decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
+                encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
                  assert(b->parent==NULL || b->parent->stride == b->stride*2);
                  if(orientation==0)
-                    correlate(s, b, b->buf, b->stride, 1, 0);
+                    correlate(s, b, b->ibuf, b->stride, 1, 0);
              }
          }
  //        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
@@ -4210,20 +3997,20 @@ redo_frame:
              for(orientation=level ? 1 : 0; orientation<4; orientation++){
                  SubBand *b= &p->band[level][orientation];
  
-                dequantize(s, b, b->buf, b->stride);
+                dequantize(s, b, b->ibuf, b->stride);
              }
          }
  
-        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+        ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
          if(s->qlog == LOSSLESS_QLOG){
              for(y=0; y<h; y++){
                  for(x=0; x<w; x++){
-                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
+                    s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
                  }
              }
          }
  {START_TIMER
-        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+        predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
  STOP_TIMER("pred-conv")}
        }else{
              //ME/MC only
@@ -4235,8 +4022,8 @@ STOP_TIMER("pred-conv")}
                      }
                  }
              }else{
-                memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
-                predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
              }
        }
          if(s->avctx->flags&CODEC_FLAG_PSNR){
@@ -4267,10 +4054,15 @@ STOP_TIMER("pred-conv")}
      s->m.current_picture.quality = pict->quality;
      s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
      if(s->pass1_rc)
-        ff_rate_estimate_qscale(&s->m, 0);
+        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
+            return -1;
      if(avctx->flags&CODEC_FLAG_PASS1)
          ff_write_pass1_stats(&s->m);
      s->m.last_pict_type = s->m.pict_type;
+    avctx->frame_bits = s->m.frame_bits;
+    avctx->mv_bits = s->m.mv_bits;
+    avctx->misc_bits = s->m.misc_bits;
+    avctx->p_tex_bits = s->m.p_tex_bits;
  
      emms_c();
  
@@ -4281,6 +4073,7 @@ static void common_end(SnowContext *s){
      int plane_index, level, orientation, i;
  
      av_freep(&s->spatial_dwt_buffer);
+    av_freep(&s->spatial_idwt_buffer);
  
      av_freep(&s->m.me.scratchpad);
      av_freep(&s->m.me.map);
@@ -4327,7 +4120,7 @@ static int decode_init(AVCodecContext *avctx)
      common_init(avctx);
  
      block_size = MB_SIZE >> s->block_max_depth;
-    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
+    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer);
  
      return 0;
  }
@@ -4362,7 +4155,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
  
  if(s->avctx->debug&2048){
          memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
-        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+        predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
  
          for(y=0; y<h; y++){
              for(x=0; x<w; x++){
@@ -4427,8 +4220,8 @@ if(s->avctx->debug&2048){
                          int correlate_end_y = FFMIN(b->height, end_y + 1);
                          int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
                          decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
-                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
-                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
+                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
+                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
                      }
                      else
                          decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
@@ -4447,14 +4240,14 @@ if(s->avctx->debug&2048){
  
          if(s->qlog == LOSSLESS_QLOG){
              for(; yq<slice_h && yq<h; yq++){
-                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
+                IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
                  for(x=0; x<w; x++){
                      line[x] <<= FRAC_BITS;
                  }
              }
          }
  
-        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
+        predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
  
          y = FFMIN(p->height, slice_starty);
          end_y = FFMIN(p->height, slice_h);
@@ -4509,7 +4302,7 @@ AVCodec snow_decoder = {
      NULL
  };
  
-#ifdef CONFIG_ENCODERS
+#ifdef CONFIG_SNOW_ENCODER
  AVCodec snow_encoder = {
      "snow",
      CODEC_TYPE_VIDEO,
@@ -4526,6 +4319,7 @@ AVCodec snow_encoder = {
  #undef malloc
  #undef free
  #undef printf
+#undef random
  
  int main(){
      int width=256;
@@ -4555,7 +4349,7 @@ int main(){
      ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
  
      for(i=0; i<width*height; i++)
-        if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
+        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
  
  #if 0
      printf("testing AC coder\n");
@@ -4565,7 +4359,7 @@ int main(){
  
      for(i=-256; i<256; i++){
  START_TIMER
-        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
+        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
  STOP_TIMER("put_symbol")
      }
      ff_rac_terminate(&s.c);
@@ -4579,7 +4373,7 @@ STOP_TIMER("put_symbol")
  START_TIMER
          j= get_symbol(&s.c, s.header_state, 1);
  STOP_TIMER("get_symbol")
-        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
+        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
      }
  #endif
  {
@@ -4608,9 +4402,9 @@ int64_t g=0;
                  for(x=0; x<width; x++){
                      int64_t d= buffer[0][x + y*width];
                      error += d*d;
-                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
+                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
                  }
-                if(ABS(height/2-y)<9 && level==2) printf("\n");
+                if(FFABS(height/2-y)<9 && level==2) printf("\n");
              }
              error= (int)(sqrt(error)+0.5);
              errors[level][orientation]= error;
@@ -4622,7 +4416,7 @@ int64_t g=0;
      for(level=0; level<s.spatial_decomposition_count; level++){
          printf("  {");
          for(orientation=0; orientation<4; orientation++){
-            printf("%8lld,", errors[level][orientation]/g);
+            printf("%8"PRId64",", errors[level][orientation]/g);
          }
          printf("},\n");
      }
@@ -4661,9 +4455,9 @@ int64_t g=0;
                  for(x=0; x<width; x++){
                      int64_t d= buffer[0][x + y*width];
                      error += d*d;
-                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
+                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
                  }
-                if(ABS(height/2-y)<9) printf("\n");
+                if(FFABS(height/2-y)<9) printf("\n");
              }
      }