h263 loop filter

[ffmpeg] / libavcodec / mpegvideo.c
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index 34e581123a2c615c6a016092e9e1f74bb758c6af..595ab16c2f53b838ddbb57cd304832becaf187ae 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -28,6 +28,7 @@
  #include "avcodec.h"
  #include "dsputil.h"
  #include "mpegvideo.h"
+#include "faandct.h"
  
  #ifdef USE_FASTMEMCPY
  #include "fastmemcpy.h"
@@ -55,7 +56,7 @@ static int sse_mb(MpegEncContext *s);
  #ifdef HAVE_XVMC
  extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  extern void XVMC_field_end(MpegEncContext *s);
-extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+extern void XVMC_decode_mb(MpegEncContext *s);
  #endif
  
  void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
@@ -87,22 +88,31 @@ static const uint8_t h263_chroma_roundtab[16] = {
      0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  };
  
+static const uint8_t ff_default_chroma_qscale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+};
+
  #ifdef CONFIG_ENCODERS
  static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  static uint8_t default_fcode_tab[MAX_MV*2+1];
  
  enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
  
-static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
+static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
                             const uint16_t *quant_matrix, int bias, int qmin, int qmax)
  {
      int qscale;
  
      for(qscale=qmin; qscale<=qmax; qscale++){
          int i;
-        if (s->dsp.fdct == ff_jpeg_fdct_islow) {
+        if (dsp->fdct == ff_jpeg_fdct_islow 
+#ifdef FAAN_POSTSCALE
+            || dsp->fdct == ff_faandct
+#endif
+            ) {
              for(i=0;i<64;i++) {
-                const int j= s->dsp.idct_permutation[i];
+                const int j= dsp->idct_permutation[i];
                  /* 16 <= qscale * quant_matrix[i] <= 7905 */
                  /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                  /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@@ -111,9 +121,13 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
                  qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
                                  (qscale * quant_matrix[j]));
              }
-        } else if (s->dsp.fdct == fdct_ifast) {
+        } else if (dsp->fdct == fdct_ifast
+#ifndef FAAN_POSTSCALE
+                   || dsp->fdct == ff_faandct
+#endif
+                   ) {
              for(i=0;i<64;i++) {
-                const int j= s->dsp.idct_permutation[i];
+                const int j= dsp->idct_permutation[i];
                  /* 16 <= qscale * quant_matrix[i] <= 7905 */
                  /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                  /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@@ -124,7 +138,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
              }
          } else {
              for(i=0;i<64;i++) {
-                const int j= s->dsp.idct_permutation[i];
+                const int j= dsp->idct_permutation[i];
                  /* We can safely suppose that 16 <= quant_matrix[i] <= 255
                     So 16           <= qscale * quant_matrix[i]             <= 7905
                     so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
@@ -132,10 +146,10 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
                  */
                  qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
  //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
-                qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
+                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
  
-                if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
-                qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
+                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
+                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
              }
          }
      }
@@ -265,17 +279,17 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
          r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
          
          if(r<0 || !pic->age || !pic->type || !pic->data[0]){
-            fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
+           av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
              return -1;
          }
  
          if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
-            fprintf(stderr, "get_buffer() failed (stride changed)\n");
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
              return -1;
          }
  
          if(pic->linesize[1] != pic->linesize[2]){
-            fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
              return -1;
          }
  
@@ -288,7 +302,6 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
              CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
              CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
              CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
          }
  
          CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
@@ -302,6 +315,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
              }
          }
          pic->qstride= s->mb_stride;
+        CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
      }
  
      //it might be nicer if the application would keep track of these but it would require a API change
@@ -328,10 +342,10 @@ static void free_picture(MpegEncContext *s, Picture *pic){
      av_freep(&pic->mb_var);
      av_freep(&pic->mc_mb_var);
      av_freep(&pic->mb_mean);
-    av_freep(&pic->mb_cmp_score);
      av_freep(&pic->mbskip_table);
      av_freep(&pic->qscale_table);
      av_freep(&pic->mb_type_base);
+    av_freep(&pic->pan_scan);
      pic->mb_type= NULL;
      for(i=0; i<2; i++){
          av_freep(&pic->motion_val[i]);
@@ -375,6 +389,10 @@ int MPV_common_init(MpegEncContext *s)
      s->block_wrap[4]=
      s->block_wrap[5]= s->mb_width + 2;
  
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    s->chroma_qscale_table= ff_default_chroma_qscale_table;    
+    
      y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
      c_size = (s->mb_width + 2) * (s->mb_height + 2);
      yc_size = y_size + 2 * c_size;
@@ -440,11 +458,26 @@ int MPV_common_init(MpegEncContext *s)
          CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
          
          CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
+        
+        CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
+        CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
+        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
+        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
+        CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
+        CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
+        
+        if(s->avctx->noise_reduction){
+            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
+            CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
+        }
      }
+    CHECKED_ALLOCZ(s->blocks, 64*6*2 * sizeof(DCTELEM))
          
+    CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
+
      CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
      
-    if (s->out_format == FMT_H263 || s->encoding) {
+    if (s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_VIS_MV)) {
          int size;
  
          /* MV prediction */
@@ -498,6 +531,10 @@ int MPV_common_init(MpegEncContext *s)
      
      s->block= s->blocks[0];
  
+    for(i=0;i<12;i++){
+        s->pblocks[i] = (short *)(&s->block[i]);
+    }
+
      s->parse_context.state= -1;
  
      s->context_initialized = 1;
@@ -556,10 +593,22 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->error_status_table);
      av_freep(&s->mb_index2xy);
      av_freep(&s->lambda_table);
-
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
-        free_picture(s, &s->picture[i]);
+    av_freep(&s->q_intra_matrix);
+    av_freep(&s->q_inter_matrix);
+    av_freep(&s->q_intra_matrix16);
+    av_freep(&s->q_inter_matrix16);
+    av_freep(&s->blocks);
+    av_freep(&s->input_picture);
+    av_freep(&s->reordered_input_picture);
+    av_freep(&s->dct_error_sum);
+    av_freep(&s->dct_offset);
+
+    if(s->picture){
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            free_picture(s, &s->picture[i]);
+        }
      }
+    av_freep(&s->picture);
      avcodec_default_free_buffers(s->avctx);
      s->context_initialized = 0;
      s->last_picture_ptr=
@@ -583,7 +632,7 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->width = avctx->width;
      s->height = avctx->height;
      if(avctx->gop_size > 600){
-        fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
+       av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
          avctx->gop_size=600;
      }
      s->gop_size = avctx->gop_size;
@@ -622,41 +671,61 @@ int MPV_encode_init(AVCodecContext *avctx)
                          || s->avctx->dark_masking
                          || s->avctx->temporal_cplx_masking 
                          || s->avctx->spatial_cplx_masking
-                        || s->avctx->p_masking)
+                        || s->avctx->p_masking
+                        || (s->flags&CODEC_FLAG_QP_RD))
                         && !s->fixed_qscale;
      
      s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+    
+    s->obmc= (s->flags & CODEC_FLAG_OBMC);
+    s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER);
  
-    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
-        fprintf(stderr, "4MV not supporetd by codec\n");
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
+       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
+        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
+        return -1;
+    }
+    
+    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
+        return -1;
+    }
+    
+    if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
          return -1;
      }
      
      if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
-        fprintf(stderr, "qpel not supporetd by codec\n");
+        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
          return -1;
      }
  
      if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
-        fprintf(stderr, "data partitioning not supporetd by codec\n");
+        av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
          return -1;
      }
      
      if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
-        fprintf(stderr, "b frames not supporetd by codec\n");
+        av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
          return -1;
      }
      
      if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
-        fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
+        av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
          return -1;
      }
          
      if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
-        fprintf(stderr, "CBP RD needs trellis quant\n");
+        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
          return -1;
      }
  
+    if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
+        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
+        return -1;
+    }
+    
      if(s->codec_id==CODEC_ID_MJPEG){
          s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
          s->inter_quant_bias= 0;
@@ -711,10 +780,11 @@ int MPV_encode_init(AVCodecContext *avctx)
  #ifdef CONFIG_RISKY
      case CODEC_ID_H263:
          if (h263_get_picture_format(s->width, s->height) == 7) {
-            printf("Input picture size isn't suitable for h263 codec! try h263+\n");
+            av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
              return -1;
          }
          s->out_format = FMT_H263;
+       s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -722,11 +792,18 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->out_format = FMT_H263;
          s->h263_plus = 1;
         /* Fx */
-       s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
+        s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
+       s->modified_quant= s->h263_aic;
+       s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
+       s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
+       s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
+       s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
+        if(s->modified_quant)
+            s->chroma_qscale_table= ff_h263_chroma_qscale_table;
+
         /* /Fx */
          /* These are just to be sure */
-        s->umvplus = 1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -740,7 +817,6 @@ int MPV_encode_init(AVCodecContext *avctx)
          break;
      case CODEC_ID_RV10:
          s->out_format = FMT_H263;
-        s->h263_rv10 = 1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -821,8 +897,6 @@ int MPV_encode_init(AVCodecContext *avctx)
      }
      s->me.mv_penalty= default_mv_penalty;
      s->fcode_tab= default_fcode_tab;
-    s->y_dc_scale_table=
-    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
   
      /* dont use mv_penalty table for crap MV as it would be confused */
      //FIXME remove after fixing / removing old ME
@@ -872,9 +946,9 @@ int MPV_encode_init(AVCodecContext *avctx)
      /* precompute matrix */
      /* for mjpeg, we do include qscale in the matrix */
      if (s->out_format != FMT_MJPEG) {
-        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
                         s->intra_matrix, s->intra_quant_bias, 1, 31);
-        convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
+        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
                         s->inter_matrix, s->inter_quant_bias, 1, 31);
      }
  
@@ -979,32 +1053,50 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
      }
  }
  
-static int find_unused_picture(MpegEncContext *s, int shared){
+int ff_find_unused_picture(MpegEncContext *s, int shared){
      int i;
      
      if(shared){
          for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
+            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
          }
      }else{
          for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
+            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
          }
          for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL) break;
+            if(s->picture[i].data[0]==NULL) return i;
          }
      }
  
-    assert(i<MAX_PICTURE_COUNT);
-    return i;
+    assert(0);
+    return -1;
  }
  
-/* generic function for encode/decode called before a frame is coded/decoded */
+static void update_noise_reduction(MpegEncContext *s){
+    int intra, i;
+
+    for(intra=0; intra<2; intra++){
+        if(s->dct_count[intra] > (1<<16)){
+            for(i=0; i<64; i++){
+                s->dct_error_sum[intra][i] >>=1;
+            }
+            s->dct_count[intra] >>= 1;
+        }
+        
+        for(i=0; i<64; i++){
+            s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
+        }
+    }
+}
+
+/**
+ * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
+ */
  int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
  {
      int i;
      AVFrame *pic;
-
      s->mb_skiped = 0;
  
      assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
@@ -1018,7 +1110,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
          if(!s->encoding){
              for(i=0; i<MAX_PICTURE_COUNT; i++){
                  if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
-                    fprintf(stderr, "releasing zombie picture\n");
+                    av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
                      avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
                  }
              }
@@ -1033,18 +1125,22 @@ alloc:
              }
          }
  
-        i= find_unused_picture(s, 0);
-    
-        pic= (AVFrame*)&s->picture[i];
+        if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
+            pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
+        else{
+            i= ff_find_unused_picture(s, 0);
+            pic= (AVFrame*)&s->picture[i];
+        }
+
          pic->reference= s->pict_type != B_TYPE ? 3 : 0;
  
-        if(s->current_picture_ptr)
+        if(s->current_picture_ptr) //FIXME broken, we need a coded_picture_number in MpegEncContext
              pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
          
          if( alloc_picture(s, (Picture*)pic, 0) < 0)
              return -1;
  
-        s->current_picture_ptr= &s->picture[i];
+        s->current_picture_ptr= (Picture*)pic;
      }
  
      s->current_picture_ptr->pict_type= s->pict_type;
@@ -1064,7 +1160,7 @@ alloc:
      if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
      
      if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
-        fprintf(stderr, "warning: first frame is no keyframe\n");
+        av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
          assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
          goto alloc;
      }
@@ -1096,6 +1192,12 @@ alloc:
      else 
          s->dct_unquantize = s->dct_unquantize_mpeg1;
  
+    if(s->dct_error_sum){
+        assert(s->avctx->noise_reduction && s->encoding);
+
+        update_noise_reduction(s);
+    }
+        
  #ifdef HAVE_XVMC
      if(s->avctx->xvmc_acceleration)
          return XVMC_field_start(s, avctx);
@@ -1114,12 +1216,10 @@ void MPV_frame_end(MpegEncContext *s)
          XVMC_field_end(s);
      }else
  #endif
-    if(s->codec_id!=CODEC_ID_SVQ1 && s->out_format != FMT_MPEG1){
-        if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+    if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
              draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
              draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
              draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
-        }
      }
      emms_c();
      
@@ -1247,63 +1347,63 @@ void ff_print_debug_info(MpegEncContext *s, Picture *pict){
                  if(s->avctx->debug&FF_DEBUG_SKIP){
                      int count= s->mbskip_table[x + y*s->mb_stride];
                      if(count>9) count=9;
-                    printf("%1d", count);
+                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
                  }
                  if(s->avctx->debug&FF_DEBUG_QP){
-                    printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
+                    av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
                  }
                  if(s->avctx->debug&FF_DEBUG_MB_TYPE){
                      int mb_type= pict->mb_type[x + y*s->mb_stride];
                      
                      //Type & MV direction
                      if(IS_PCM(mb_type))
-                        printf("P");
+                        av_log(s->avctx, AV_LOG_DEBUG, "P");
                      else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
-                        printf("A");
+                        av_log(s->avctx, AV_LOG_DEBUG, "A");
                      else if(IS_INTRA4x4(mb_type))
-                        printf("i");
+                        av_log(s->avctx, AV_LOG_DEBUG, "i");
                      else if(IS_INTRA16x16(mb_type))
-                        printf("I");
+                        av_log(s->avctx, AV_LOG_DEBUG, "I");
                      else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
-                        printf("d");
+                        av_log(s->avctx, AV_LOG_DEBUG, "d");
                      else if(IS_DIRECT(mb_type))
-                        printf("D");
+                        av_log(s->avctx, AV_LOG_DEBUG, "D");
                      else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
-                        printf("g");
+                        av_log(s->avctx, AV_LOG_DEBUG, "g");
                      else if(IS_GMC(mb_type))
-                        printf("G");
+                        av_log(s->avctx, AV_LOG_DEBUG, "G");
                      else if(IS_SKIP(mb_type))
-                        printf("S");
+                        av_log(s->avctx, AV_LOG_DEBUG, "S");
                      else if(!USES_LIST(mb_type, 1))
-                        printf(">");
+                        av_log(s->avctx, AV_LOG_DEBUG, ">");
                      else if(!USES_LIST(mb_type, 0))
-                        printf("<");
+                        av_log(s->avctx, AV_LOG_DEBUG, "<");
                      else{
                          assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
-                        printf("X");
+                        av_log(s->avctx, AV_LOG_DEBUG, "X");
                      }
                      
                      //segmentation
                      if(IS_8X8(mb_type))
-                        printf("+");
+                        av_log(s->avctx, AV_LOG_DEBUG, "+");
                      else if(IS_16X8(mb_type))
-                        printf("-");
+                        av_log(s->avctx, AV_LOG_DEBUG, "-");
                      else if(IS_8X16(mb_type))
-                        printf("¦");
+                        av_log(s->avctx, AV_LOG_DEBUG, "¦");
                      else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
-                        printf(" ");
+                        av_log(s->avctx, AV_LOG_DEBUG, " ");
                      else
-                        printf("?");
+                        av_log(s->avctx, AV_LOG_DEBUG, "?");
                      
                          
                      if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
-                        printf("=");
+                        av_log(s->avctx, AV_LOG_DEBUG, "=");
                      else
-                        printf(" ");
+                        av_log(s->avctx, AV_LOG_DEBUG, " ");
                  }
-//                printf(" ");
+//                av_log(s->avctx, AV_LOG_DEBUG, " ");
              }
-            printf("\n");
+            av_log(s->avctx, AV_LOG_DEBUG, "\n");
          }
      }
      
@@ -1327,6 +1427,16 @@ void ff_print_debug_info(MpegEncContext *s, Picture *pict){
                          int my= (s->motion_val[xy][1]>>shift) + sy;
                          draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
                      }
+                }else if(IS_16X8(s->current_picture.mb_type[mb_index])){
+                    int i;
+                    for(i=0; i<2; i++){
+                        int sx=mb_x*16 + 8;
+                        int sy=mb_y*16 + 4 + 8*i;
+                        int xy=1 + mb_x*2 + (mb_y*2 + 1 + i)*(s->mb_width*2 + 2);
+                        int mx=(s->motion_val[xy][0]>>shift) + sx;
+                        int my=(s->motion_val[xy][1]>>shift) + sy;
+                        draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+                    }
                  }else{
                      int sx= mb_x*16 + 8;
                      int sy= mb_y*16 + 8;
@@ -1389,10 +1499,10 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
      if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
      if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
    
-//    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
+//    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
      
      if(direct){
-        i= find_unused_picture(s, 1);
+        i= ff_find_unused_picture(s, 1);
  
          pic= (AVFrame*)&s->picture[i];
          pic->reference= 3;
@@ -1404,7 +1514,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
          alloc_picture(s, (Picture*)pic, 1);
      }else{
          int offset= 16;
-        i= find_unused_picture(s, 0);
+        i= ff_find_unused_picture(s, 0);
  
          pic= (AVFrame*)&s->picture[i];
          pic->reference= 3;
@@ -1496,7 +1606,7 @@ static void select_input_picture(MpegEncContext *s){
                  }
              
                  if(b_frames > s->max_b_frames){
-                    fprintf(stderr, "warning, too many bframes in a row\n");
+                    av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
                      b_frames = s->max_b_frames;
                  }
              }else if(s->b_frame_strategy==0){
@@ -1521,14 +1631,14 @@ static void select_input_picture(MpegEncContext *s){
                      s->input_picture[i]->b_frame_score=0;
                  }
              }else{
-                fprintf(stderr, "illegal b frame strategy\n");
+                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
                  b_frames=0;
              }
  
              emms_c();
  //static int b_count=0;
  //b_count+= b_frames;
-//printf("b_frames: %d\n", b_count);
+//av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
                          
              s->reordered_input_picture[0]= s->input_picture[b_frames];
              if(   s->picture_in_gop_number + b_frames >= s->gop_size 
@@ -1554,7 +1664,7 @@ static void select_input_picture(MpegEncContext *s){
          if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
              // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
          
-            int i= find_unused_picture(s, 0);
+            int i= ff_find_unused_picture(s, 0);
              Picture *pic= &s->picture[i];
  
              /* mark us unused / free shared pic */
@@ -1600,7 +1710,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
      int i;
  
      if(avctx->pix_fmt != PIX_FMT_YUV420P){
-        fprintf(stderr, "this codec supports only YUV420P\n");
+        av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
          return -1;
      }
      
@@ -1688,8 +1798,8 @@ static inline void gmc1_motion(MpegEncContext *s,
  
      dest_y+=dest_offset;
      if(s->flags&CODEC_FLAG_EMU_EDGE){
-        if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
-                              || src_y + 17 >= s->v_edge_pos){
+        if(   (unsigned)src_x >= s->h_edge_pos - 17
+           || (unsigned)src_y >= s->v_edge_pos - 17){
              ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer;
          }
@@ -1727,8 +1837,8 @@ static inline void gmc1_motion(MpegEncContext *s,
      offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
      ptr = ref_picture[1] + offset;
      if(s->flags&CODEC_FLAG_EMU_EDGE){
-        if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
-                              || src_y + 9 >= s->v_edge_pos>>1){
+        if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
+           || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
              ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
              ptr= s->edge_emu_buffer;
              emu=1;
@@ -1880,6 +1990,42 @@ void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w,
      }
  }
  
+static inline int hpel_motion(MpegEncContext *s, 
+                                  uint8_t *dest, uint8_t *src, 
+                                  int src_x, int src_y,
+                                  int width, int height, int stride,
+                                  int h_edge_pos, int v_edge_pos,
+                                  int w, int h, op_pixels_func *pix_op,
+                                  int motion_x, int motion_y)
+{
+    int dxy;
+    int emu=0;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x += motion_x >> 1;
+    src_y += motion_y >> 1;
+                
+    /* WARNING: do no forget half pels */
+    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
+    if (src_x == width)
+        dxy &= ~1;
+    src_y = clip(src_y, -16, height);
+    if (src_y == height)
+        dxy &= ~2;
+    src += src_y * stride + src_x;
+
+    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
+        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
+           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, src, stride, w+1, h+1,
+                             src_x, src_y, h_edge_pos, v_edge_pos);
+            src= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest, src, stride, h);
+    return emu;
+}
  
  /* apply one mpeg motion vector to the three components */
  static inline void mpeg_motion(MpegEncContext *s,
@@ -1890,7 +2036,7 @@ static inline void mpeg_motion(MpegEncContext *s,
                                 int motion_x, int motion_y, int h)
  {
      uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
+    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, uvlinesize;
      int emu=0;
  #if 0    
  if(s->quarter_sample)
@@ -1899,34 +2045,19 @@ if(s->quarter_sample)
      motion_y>>=1;
  }
  #endif
-    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-    src_x = s->mb_x * 16 + (motion_x >> 1);
-    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
-                
-    /* WARNING: do no forget half pels */
+
      height = s->height >> field_based;
      v_edge_pos = s->v_edge_pos >> field_based;
-    src_x = clip(src_x, -16, s->width);
-    if (src_x == s->width)
-        dxy &= ~1;
-    src_y = clip(src_y, -16, height);
-    if (src_y == height)
-        dxy &= ~2;
-    linesize   = s->current_picture.linesize[0] << field_based;
      uvlinesize = s->current_picture.linesize[1] << field_based;
-    ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
-    dest_y += dest_offset;
  
-    if(s->flags&CODEC_FLAG_EMU_EDGE){
-        if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
-                              || src_y + (motion_y&1) + h  > v_edge_pos){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
-                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
-            ptr= s->edge_emu_buffer + src_offset;
-            emu=1;
-        }
-    }
-    pix_op[0][dxy](dest_y, ptr, linesize, h);
+    emu= hpel_motion(s, 
+                dest_y + dest_offset, ref_picture[0] + src_offset,
+                s->mb_x * 16, s->mb_y * (16 >> field_based),
+                s->width, height, s->current_picture.linesize[0] << field_based,
+                s->h_edge_pos, v_edge_pos,
+                16, h, pix_op[0],
+                motion_x, motion_y);
+
  
      if(s->flags&CODEC_FLAG_GRAY) return;
  
@@ -1971,6 +2102,87 @@ if(s->quarter_sample)
      }
      pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
  }
+//FIXME move to dsputil, avg variant, 16x16 version
+static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
+    int x;
+    uint8_t * const top   = src[1];
+    uint8_t * const left  = src[2];
+    uint8_t * const mid   = src[0];
+    uint8_t * const right = src[3];
+    uint8_t * const bottom= src[4];
+#define OBMC_FILTER(x, t, l, m, r, b)\
+    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
+#define OBMC_FILTER4(x, t, l, m, r, b)\
+    OBMC_FILTER(x         , t, l, m, r, b);\
+    OBMC_FILTER(x+1       , t, l, m, r, b);\
+    OBMC_FILTER(x  +stride, t, l, m, r, b);\
+    OBMC_FILTER(x+1+stride, t, l, m, r, b);
+    
+    x=0;
+    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
+    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
+    x+= stride;
+    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
+    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
+    x+= stride;
+    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
+    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
+    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
+    x+= 2*stride;
+    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
+    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
+    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
+    x+= 2*stride;
+    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
+    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
+    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
+    x+= stride;
+    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
+    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
+}
+
+/* obmc for 1 8x8 luma block */
+static inline void obmc_motion(MpegEncContext *s,
+                               uint8_t *dest, uint8_t *src,
+                               int src_x, int src_y,
+                               op_pixels_func *pix_op,
+                               int16_t mv[5][2]/* mid top left right bottom*/)
+#define MID    0
+{
+    int i;
+    uint8_t *ptr[5];
+    
+    assert(s->quarter_sample==0);
+    
+    for(i=0; i<5; i++){
+        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
+            ptr[i]= ptr[MID];
+        }else{
+            ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1);
+            hpel_motion(s, ptr[i], src,
+                        src_x, src_y,
+                        s->width, s->height, s->linesize,
+                        s->h_edge_pos, s->v_edge_pos,
+                        8, 8, pix_op,
+                        mv[i][0], mv[i][1]);
+        }
+    }
+
+    put_obmc(dest, ptr, s->linesize);                
+}
  
  static inline void qpel_motion(MpegEncContext *s,
                                 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
@@ -2003,8 +2215,8 @@ static inline void qpel_motion(MpegEncContext *s,
  //printf("%d %d %d\n", src_x, src_y, dxy);
      
      if(s->flags&CODEC_FLAG_EMU_EDGE){
-        if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
-                              || src_y + (motion_y&3) + h  > v_edge_pos){
+        if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
+           || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
              ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
                               src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer + src_offset;
@@ -2079,6 +2291,55 @@ inline int ff_h263_round_chroma(int x){
      }
  }
  
+/**
+ * h263 chorma 4mv motion compensation.
+ */
+static inline void chroma_4mv_motion(MpegEncContext *s,
+                                     uint8_t *dest_cb, uint8_t *dest_cr,
+                                     uint8_t **ref_picture,
+                                     op_pixels_func *pix_op,
+                                     int mx, int my){
+    int dxy, emu=0, src_x, src_y, offset;
+    uint8_t *ptr;
+    
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx= ff_h263_round_chroma(mx);
+    my= ff_h263_round_chroma(my);
+    
+    dxy = ((my & 1) << 1) | (mx & 1);
+    mx >>= 1;
+    my >>= 1;
+
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * 8 + my;
+    src_x = clip(src_x, -8, s->width/2);
+    if (src_x == s->width/2)
+        dxy &= ~1;
+    src_y = clip(src_y, -8, s->height/2);
+    if (src_y == s->height/2)
+        dxy &= ~2;
+    
+    offset = (src_y * (s->uvlinesize)) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
+           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
+}
+
  /**
   * motion compesation of a single macroblock
   * @param s context
@@ -2096,14 +2357,74 @@ static inline void MPV_motion(MpegEncContext *s,
                                int dir, uint8_t **ref_picture, 
                                op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
  {
-    int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
+    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
      int mb_x, mb_y, i;
      uint8_t *ptr, *dest;
-    int emu=0;
  
      mb_x = s->mb_x;
      mb_y = s->mb_y;
  
+    if(s->obmc){
+        int16_t mv_cache[4][4][2];
+        const int xy= s->mb_x + s->mb_y*s->mb_stride;
+        const int mot_stride= s->mb_width*2 + 2;
+        const int mot_xy= 1 + mb_x*2 + (mb_y*2 + 1)*mot_stride;
+
+        assert(!s->mb_skiped);
+                
+        memcpy(mv_cache[1][1], s->motion_val[mot_xy           ], sizeof(int16_t)*4);
+        memcpy(mv_cache[2][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+        memcpy(mv_cache[3][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+
+        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
+            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
+        }else{
+            memcpy(mv_cache[0][1], s->motion_val[mot_xy-mot_stride], sizeof(int16_t)*4);
+        }
+
+        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
+        }else{
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->motion_val[mot_xy-1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->motion_val[mot_xy-1+mot_stride];
+        }
+
+        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
+        }else{
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->motion_val[mot_xy+2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->motion_val[mot_xy+2+mot_stride];
+        }
+        
+        mx = 0;
+        my = 0;
+        for(i=0;i<4;i++) {
+            const int x= (i&1)+1;
+            const int y= (i>>1)+1;
+            int16_t mv[5][2]= {
+                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
+                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
+                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
+                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
+                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
+            //FIXME cleanup
+            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                        ref_picture[0],
+                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                        pix_op[1],
+                        mv);
+
+            mx += mv[0][0];
+            my += mv[0][1];
+        }
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+
+        return;
+    }
+   
      switch(s->mv_type) {
      case MV_TYPE_16X16:
  #ifdef CONFIG_RISKY
@@ -2155,8 +2476,8 @@ static inline void MPV_motion(MpegEncContext *s,
                      
                  ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
                  if(s->flags&CODEC_FLAG_EMU_EDGE){
-                    if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
-                                          || src_y + (motion_y&3) + 8 > s->v_edge_pos){
+                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
+                       || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
                          ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
                          ptr= s->edge_emu_buffer;
                      }
@@ -2169,73 +2490,21 @@ static inline void MPV_motion(MpegEncContext *s,
              }
          }else{
              for(i=0;i<4;i++) {
-                motion_x = s->mv[dir][i][0];
-                motion_y = s->mv[dir][i][1];
-
-                dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-                src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
-                src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
-                    
-                /* WARNING: do no forget half pels */
-                src_x = clip(src_x, -16, s->width);
-                if (src_x == s->width)
-                    dxy &= ~1;
-                src_y = clip(src_y, -16, s->height);
-                if (src_y == s->height)
-                    dxy &= ~2;
-                    
-                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
-                if(s->flags&CODEC_FLAG_EMU_EDGE){
-                    if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
-                                          || src_y + (motion_y&1) + 8 > s->v_edge_pos){
-                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-                        ptr= s->edge_emu_buffer;
-                    }
-                }
-                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
-                pix_op[1][dxy](dest, ptr, s->linesize, 8);
+                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                            ref_picture[0],
+                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                            s->width, s->height, s->linesize,
+                            s->h_edge_pos, s->v_edge_pos,
+                            8, 8, pix_op[1],
+                            s->mv[dir][i][0], s->mv[dir][i][1]);
  
                  mx += s->mv[dir][i][0];
                  my += s->mv[dir][i][1];
              }
          }
  
-        if(s->flags&CODEC_FLAG_GRAY) break;
-        /* In case of 8X8, we construct a single chroma motion vector
-           with a special rounding */
-        mx= ff_h263_round_chroma(mx);
-        my= ff_h263_round_chroma(my);
-        dxy = ((my & 1) << 1) | (mx & 1);
-        mx >>= 1;
-        my >>= 1;
-
-        src_x = mb_x * 8 + mx;
-        src_y = mb_y * 8 + my;
-        src_x = clip(src_x, -8, s->width/2);
-        if (src_x == s->width/2)
-            dxy &= ~1;
-        src_y = clip(src_y, -8, s->height/2);
-        if (src_y == s->height/2)
-            dxy &= ~2;
-        
-        offset = (src_y * (s->uvlinesize)) + src_x;
-        ptr = ref_picture[1] + offset;
-        if(s->flags&CODEC_FLAG_EMU_EDGE){
-                if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
-                                      || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
-                    ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-                    ptr= s->edge_emu_buffer;
-                    emu=1;
-                }
-            }
-        pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
-
-        ptr = ref_picture[2] + offset;
-        if(emu){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-            ptr= s->edge_emu_buffer;
-        }
-        pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
          break;
      case MV_TYPE_FIELD:
          if (s->picture_structure == PICT_FRAME) {
@@ -2375,7 +2644,7 @@ static inline void MPV_motion(MpegEncContext *s,
          }
      }
      break;
-
+    default: assert(0);
      }
  }
  
@@ -2398,10 +2667,10 @@ static inline void add_dct(MpegEncContext *s,
  }
  
  static inline void add_dequant_dct(MpegEncContext *s, 
-                           DCTELEM *block, int i, uint8_t *dest, int line_size)
+                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
  {
      if (s->block_last_index[i] >= 0) {
-        s->dct_unquantize(s, block, i, s->qscale);
+        s->dct_unquantize(s, block, i, qscale);
  
          s->dsp.idct_add (dest, line_size, block);
      }
@@ -2456,7 +2725,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
      const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
  #ifdef HAVE_XVMC
      if(s->avctx->xvmc_acceleration){
-        XVMC_decode_mb(s,block);
+        XVMC_decode_mb(s);//xvmc uses pblocks
          return;
      }
  #endif
@@ -2487,6 +2756,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
          qpel_mc_func (*op_qpix)[16];
          const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
          const int uvlinesize= s->current_picture.linesize[1];
+        const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band;
  
          /* avoid copy if macroblock skipped in last frame too */
          /* skip only during decoding as we might trash the buffers during encoding a bit */
@@ -2522,11 +2792,15 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              dct_linesize = linesize;
              dct_offset = linesize * 8;
          }
-        
-        dest_y=  s->dest[0];
-        dest_cb= s->dest[1];
-        dest_cr= s->dest[2];
-
+        if(readable){
+            dest_y=  s->dest[0];
+            dest_cb= s->dest[1];
+            dest_cr= s->dest[2];
+        }else{
+            dest_y = s->edge_emu_buffer+32; //FIXME cleanup scratchpad pointers
+            dest_cb= s->edge_emu_buffer+48;
+            dest_cr= s->edge_emu_buffer+56;
+        }
          if (!s->mb_intra) {
              /* motion handling */
              /* decoding or more than one mb_type (MC was allready done otherwise) */
@@ -2555,14 +2829,14 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              /* add dct residue */
              if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
                                  || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
-                add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
-                add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
-                add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
-                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
+                add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
  
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
-                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
+                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
+                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
                  }
              } else if(s->codec_id != CODEC_ID_WMV2){
                  add_dct(s, block[0], 0, dest_y, dct_linesize);
@@ -2604,6 +2878,11 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                  }
              }
          }
+        if(!readable){
+            s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
+            s->dsp.put_pixels_tab[1][0](s->dest[1], dest_cb, uvlinesize, 8);
+            s->dsp.put_pixels_tab[1][0](s->dest[2], dest_cr, uvlinesize, 8);
+        }
      }
  }
  
@@ -2830,20 +3109,24 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
  
          s->lambda= s->lambda_table[mb_xy];
          update_qscale(s);
-        s->dquant= s->qscale - last_qp;
+    
+        if(!(s->flags&CODEC_FLAG_QP_RD)){
+            s->dquant= s->qscale - last_qp;
  
-        if(s->out_format==FMT_H263)
-            s->dquant= clip(s->dquant, -2, 2); //FIXME RD
+            if(s->out_format==FMT_H263)
+                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
              
-        if(s->codec_id==CODEC_ID_MPEG4){        
-            if(!s->mb_intra){
-                if((s->mv_dir&MV_DIRECT) || s->mv_type==MV_TYPE_8X8)
-                    s->dquant=0;
+            if(s->codec_id==CODEC_ID_MPEG4){        
+                if(!s->mb_intra){
+                    if((s->mv_dir&MV_DIRECT) || s->mv_type==MV_TYPE_8X8)
+                        s->dquant=0;
+                }
              }
          }
          s->qscale= last_qp + s->dquant;
+        s->chroma_qscale= s->chroma_qscale_table[ s->qscale ];
          s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
-        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
      }
  
      if (s->mb_intra) {
@@ -3142,7 +3425,7 @@ void ff_mpeg_flush(AVCodecContext *avctx){
                                      || s->picture[i].type == FF_BUFFER_TYPE_USER))
          avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
      }
-    s->last_picture_ptr = s->next_picture_ptr = NULL;
+    s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
      
      s->parse_context.state= -1;
      s->parse_context.frame_start_found= 0;
@@ -3188,6 +3471,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
  
      d->mb_skiped= 0;
      d->qscale= s->qscale;
+    d->dquant= s->dquant;
  }
  
  static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
@@ -3493,8 +3777,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
              s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
          }
-        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
-                       s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
+                       s->intra_matrix, s->intra_quant_bias, 8, 8);
      }
      
      //FIXME var duplication
@@ -3517,7 +3801,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              msmpeg4_encode_picture_header(s, picture_number);
          else if (s->h263_pred)
              mpeg4_encode_picture_header(s, picture_number);
-        else if (s->h263_rv10) 
+        else if (s->codec_id == CODEC_ID_RV10) 
              rv10_encode_picture_header(s, picture_number);
          else if (s->codec_id == CODEC_ID_FLV1)
              ff_flv_encode_picture_header(s, picture_number);
@@ -3580,8 +3864,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          s->mb_x=0;
          s->mb_y= mb_y;
  
+        s->chroma_qscale= s->chroma_qscale_table[ s->qscale ];
          s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
-        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
          ff_init_block_index(s);
          
          for(mb_x=0; mb_x < s->mb_width; mb_x++) {
@@ -3656,8 +3941,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              }
  
              s->mb_skiped=0;
+            s->dquant=0; //only for QP_RD
  
-            if(mb_type & (mb_type-1)){ // more than 1 MB type possible
+            if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible
                  int next_block=0;
                  int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
  
@@ -3755,6 +4041,60 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                              ff_clean_intra_table_entries(s); //old mode?
                      }
                  }
+
+                if(s->flags & CODEC_FLAG_QP_RD){
+                    if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
+                        const int last_qp= backup_s.qscale;
+                        int dquant, dir, qp, dc[6];
+                        DCTELEM ac[6][16];
+                        const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
+                        
+                        assert(backup_s.dquant == 0);
+
+                        //FIXME intra
+                        s->mv_dir= best_s.mv_dir;
+                        s->mv_type = MV_TYPE_16X16;
+                        s->mb_intra= best_s.mb_intra;
+                        s->mv[0][0][0] = best_s.mv[0][0][0];
+                        s->mv[0][0][1] = best_s.mv[0][0][1];
+                        s->mv[1][0][0] = best_s.mv[1][0][0];
+                        s->mv[1][0][1] = best_s.mv[1][0][1];
+                        
+                        dir= s->pict_type == B_TYPE ? 2 : 1;
+                        if(last_qp + dir > s->avctx->qmax) dir= -dir;
+                        for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
+                            qp= last_qp + dquant;
+                            if(qp < s->avctx->qmin || qp > s->avctx->qmax)
+                                break;
+                            backup_s.dquant= dquant;
+                            if(s->mb_intra){
+                                for(i=0; i<6; i++){
+                                    dc[i]= s->dc_val[0][ s->block_index[i] ];
+                                    memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
+                                }
+                            }
+
+                            encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
+                                         &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
+                            if(best_s.qscale != qp){
+                                if(s->mb_intra){
+                                    for(i=0; i<6; i++){
+                                        s->dc_val[0][ s->block_index[i] ]= dc[i];
+                                        memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
+                                    }
+                                }
+                                if(dir > 0 && dquant==dir){
+                                    dquant= 0;
+                                    dir= -dir;
+                                }else
+                                    break;
+                            }
+                        }
+                        qp= best_s.qscale;
+                        s->current_picture.qscale_table[xy]= qp;
+                    }
+                }
+
                  copy_context_after_encode(s, &best_s, -1);
                  
                  pb_bits_count= get_bit_count(&s->pb);
@@ -3790,90 +4130,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      MPV_decode_mb(s, s->block);
              } else {
                  int motion_x, motion_y;
-                int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
-                
-              if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
-                /* get luma score */
-                if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
-                }else{
-                    uint8_t *dest_y;
-
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
-                    mean*= 0x01010101;
-                    
-                    dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
-                
-                    for(i=0; i<16; i++){
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
-                    }
-
-                    s->mb_intra=1;
-                    intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
-                                        
-/*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
-                }
-                
-                /* get chroma score */
-                if(s->avctx->mb_cmp&FF_CMP_CHROMA){
-                    int i;
-                    
-                    s->mb_intra=1;
-                    for(i=1; i<3; i++){
-                        uint8_t *dest_c;
-                        int mean;
-                        
-                        if(s->out_format == FMT_H263){
-                            mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
-                        }else{
-                            mean= (s->last_dc[i] + 4)>>3;
-                        }
-                        dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
-                        
-                        mean*= 0x01010101;
-                        for(i=0; i<8; i++){
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
-                        }
-                        
-                        intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
-                    }                
-                }
-
-                /* bias */
-                switch(s->avctx->mb_cmp&0xFF){
-                default:
-                case FF_CMP_SAD:
-                    intra_score+= 32*s->qscale;
-                    break;
-                case FF_CMP_SSE:
-                    intra_score+= 24*s->qscale*s->qscale;
-                    break;
-                case FF_CMP_SATD:
-                    intra_score+= 96*s->qscale;
-                    break;
-                case FF_CMP_DCT:
-                    intra_score+= 48*s->qscale;
-                    break;
-                case FF_CMP_BIT:
-                    intra_score+= 16;
-                    break;
-                case FF_CMP_PSNR:
-                case FF_CMP_RD:
-                    intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
-                    break;
-                }
-
-                if(intra_score < inter_score)
-                    mb_type= MB_TYPE_INTRA;
-              }  
-                
                  s->mv_type=MV_TYPE_16X16;
                  // only one MB-Type possible
                  
@@ -3934,7 +4190,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      break;
                  default:
                      motion_x=motion_y=0; //gcc warning fix
-                    printf("illegal MB type\n");
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
                  }
  
                  encode_mb(s, motion_x, motion_y);
@@ -3973,6 +4229,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      s->dest[2], w>>1, h>>1, s->uvlinesize);
              }
+            if(s->loop_filter)
+                ff_h263_loop_filter(s);
  //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
          }
      }
@@ -4004,6 +4262,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      }
  }
  
+void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){
+    const int intra= s->mb_intra;
+    int i;
+
+    s->dct_count[intra]++;
+
+    for(i=0; i<64; i++){
+        int level= block[i];
+
+        if(level){
+            if(level>0){
+                s->dct_error_sum[intra][i] += level;
+                level -= s->dct_offset[intra][i];
+                if(level<0) level=0;
+            }else{
+                s->dct_error_sum[intra][i] -= level;
+                level += s->dct_offset[intra][i];
+                if(level>0) level=0;
+            }
+            block[i]= level;
+        }
+    }
+}
+
  static int dct_quantize_trellis_c(MpegEncContext *s, 
                          DCTELEM *block, int n,
                          int qscale, int *overflow){
@@ -4032,7 +4314,10 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
      const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra;
          
      s->dsp.fdct (block);
-
+    
+    if(s->dct_error_sum)
+        ff_denoise_dct(s, block);
+    
      qmul= qscale*16;
      qadd= ((qscale-1)|1)*8;
  
@@ -4324,6 +4609,9 @@ static int dct_quantize_c(MpegEncContext *s,
  
      s->dsp.fdct (block);
  
+    if(s->dct_error_sum)
+        ff_denoise_dct(s, block);
+
      if (s->mb_intra) {
          if (!s->h263_aic) {
              if (n < 4)
@@ -4758,4 +5046,3 @@ AVCodec mjpeg_encoder = {
  };
  
  #endif //CONFIG_ENCODERS
-