]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/mpegvideo.c
fix a warning
[ffmpeg] / libavcodec / mpegvideo.c
index 31497852bb279fa6492dfb2a98476dbe7dc10f53..cbdfb1475f9db8970d7b59b25c6a301b35c59f1b 100644 (file)
 #include <limits.h>
 
 #ifdef USE_FASTMEMCPY
-#include "fastmemcpy.h"
+#include "libvo/fastmemcpy.h"
 #endif
 
 //#undef NDEBUG
 //#include <assert.h>
 
 #ifdef CONFIG_ENCODERS
-static void encode_picture(MpegEncContext *s, int picture_number);
+static int encode_picture(MpegEncContext *s, int picture_number);
 #endif //CONFIG_ENCODERS
 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
@@ -47,6 +47,8 @@ static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
@@ -215,7 +217,7 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
 }
 
 #ifdef CONFIG_ENCODERS
-void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
+void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
     int i;
 
     if(matrix){
@@ -228,6 +230,36 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 }
 #endif //CONFIG_ENCODERS
 
+const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
+    int i;
+
+    assert(p<=end);
+    if(p>=end)
+        return end;
+
+    for(i=0; i<3; i++){
+        uint32_t tmp= *state << 8;
+        *state= tmp + *(p++);
+        if(tmp == 0x100 || p==end)
+            return p;
+    }
+
+    while(p<end){
+        if     (p[-1] > 1      ) p+= 3;
+        else if(p[-2]          ) p+= 2;
+        else if(p[-3]|(p[-1]-1)) p++;
+        else{
+            p++;
+            break;
+        }
+    }
+
+    p= FFMIN(p, end)-4;
+    *state=  be2me_32(unaligned32(p));
+
+    return p+4;
+}
+
 /* init common dct for both encoder and decoder */
 int DCT_common_init(MpegEncContext *s)
 {
@@ -236,6 +268,8 @@ int DCT_common_init(MpegEncContext *s)
     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
+    if(s->flags & CODEC_FLAG_BITEXACT)
+        s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 
 #ifdef CONFIG_ENCODERS
@@ -292,6 +326,7 @@ static void copy_picture(Picture *dst, Picture *src){
     dst->type= FF_BUFFER_TYPE_COPY;
 }
 
+#ifdef CONFIG_ENCODERS
 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
     int i;
 
@@ -330,6 +365,7 @@ static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *sr
         }
     }
 }
+#endif
 
 /**
  * allocates a Picture
@@ -453,8 +489,8 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
     int i;
 
     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
-    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
-    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
+    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
+    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 
      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
@@ -531,6 +567,7 @@ void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 }
 
+#ifdef CONFIG_ENCODERS
 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 #define COPY(a) dst->a= src->a
     COPY(pict_type);
@@ -547,6 +584,7 @@ static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContex
     COPY(partitioned_frame); //FIXME don't set in encode_header
 #undef COPY
 }
+#endif
 
 /**
  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
@@ -893,27 +931,44 @@ int MPV_encode_init(AVCodecContext *avctx)
 
     MPV_encode_defaults(s);
 
-    if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
-        av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
-        return -1;
-    }
-
-    if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
-        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
+    switch (avctx->codec_id) {
+    case CODEC_ID_MPEG2VIDEO:
+        if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
+            av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
+            return -1;
+        }
+        break;
+    case CODEC_ID_LJPEG:
+    case CODEC_ID_MJPEG:
+        if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
+           ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
             return -1;
         }
-    }else{
-        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
-            av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
+        break;
+    default:
+        if(avctx->pix_fmt != PIX_FMT_YUV420P){
+            av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
             return -1;
         }
     }
 
+    switch (avctx->pix_fmt) {
+    case PIX_FMT_YUVJ422P:
+    case PIX_FMT_YUV422P:
+        s->chroma_format = CHROMA_422;
+        break;
+    case PIX_FMT_YUVJ420P:
+    case PIX_FMT_YUV420P:
+    default:
+        s->chroma_format = CHROMA_420;
+        break;
+    }
+
     s->bit_rate = avctx->bit_rate;
     s->width = avctx->width;
     s->height = avctx->height;
-    if(avctx->gop_size > 600){
+    if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
         avctx->gop_size=600;
     }
@@ -957,6 +1012,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
+    s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
 
     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
@@ -1041,6 +1097,11 @@ int MPV_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
+    if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
+        av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
+        return -1;
+    }
+
     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
@@ -1067,8 +1128,8 @@ int MPV_encode_init(AVCodecContext *avctx)
     }
 
     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
-        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
-        return -1;
+        av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
+        avctx->b_frame_strategy = 0;
     }
 
     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
@@ -1122,12 +1183,12 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->intra_only = 1; /* force intra only for jpeg */
         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
-        s->mjpeg_vsample[0] = 1<<chroma_v_shift;
-        s->mjpeg_vsample[1] = 1;
-        s->mjpeg_vsample[2] = 1;
-        s->mjpeg_hsample[0] = 1<<chroma_h_shift;
-        s->mjpeg_hsample[1] = 1;
-        s->mjpeg_hsample[2] = 1;
+        s->mjpeg_vsample[0] = 2;
+        s->mjpeg_vsample[1] = 2>>chroma_v_shift;
+        s->mjpeg_vsample[2] = 2>>chroma_v_shift;
+        s->mjpeg_hsample[0] = 2;
+        s->mjpeg_hsample[1] = 2>>chroma_h_shift;
+        s->mjpeg_hsample[2] = 2>>chroma_h_shift;
         if (mjpeg_init(s) < 0)
             return -1;
         avctx->delay=0;
@@ -1315,10 +1376,6 @@ int MPV_encode_end(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
 
-#ifdef STATS
-    print_stats();
-#endif
-
     ff_rate_control_uninit(s);
 
     MPV_common_end(s);
@@ -1635,7 +1692,7 @@ void MPV_frame_end(MpegEncContext *s)
  * @param color color of the arrow
  */
 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
-    int t, x, y, fr, f;
+    int x, y, fr, f;
 
     sx= clip(sx, 0, w-1);
     sy= clip(sy, 0, h-1);
@@ -1646,8 +1703,8 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h
 
     if(ABS(ex - sx) > ABS(ey - sy)){
         if(sx > ex){
-            t=sx; sx=ex; ex=t;
-            t=sy; sy=ey; ey=t;
+            SWAP(int, sx, ex);
+            SWAP(int, sy, ey);
         }
         buf+= sx + sy*stride;
         ex-= sx;
@@ -1660,8 +1717,8 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h
         }
     }else{
         if(sy > ey){
-            t=sx; sx=ex; ex=t;
-            t=sy; sy=ey; ey=t;
+            SWAP(int, sx, ex);
+            SWAP(int, sy, ey);
         }
         buf+= sx + sy*stride;
         ey-= sy;
@@ -1801,7 +1858,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
         const int width = s->avctx->width;
         const int height= s->avctx->height;
         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
-        const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
+        const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
         s->low_delay=0; //needed to see the vectors without trashing the buffers
 
         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
@@ -2082,7 +2139,10 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
                 int w= s->width >>h_shift;
                 int h= s->height>>v_shift;
                 uint8_t *src= pic_arg->data[i];
-                uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
+                uint8_t *dst= pic->data[i];
+
+                if(!s->avctx->rc_buffer_size)
+                    dst +=INPLACE_OFFSET;
 
                 if(src_stride==dst_stride)
                     memcpy(dst, src, src_stride*h);
@@ -2150,10 +2210,11 @@ static int estimate_best_b_count(MpegEncContext *s){
     int i, j, out_size, p_lambda, b_lambda, lambda2;
     int outbuf_size= s->width * s->height; //FIXME
     uint8_t *outbuf= av_malloc(outbuf_size);
-    ImgReSampleContext *resample;
     int64_t best_rd= INT64_MAX;
     int best_b_count= -1;
 
+    assert(scale>=0 && scale <=3);
+
 //    emms_c();
     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
@@ -2175,8 +2236,6 @@ static int estimate_best_b_count(MpegEncContext *s){
     if (avcodec_open(c, codec) < 0)
         return -1;
 
-    resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
-
     for(i=0; i<s->max_b_frames+2; i++){
         int ysize= c->width*c->height;
         int csize= (c->width/2)*(c->height/2);
@@ -2199,8 +2258,11 @@ static int estimate_best_b_count(MpegEncContext *s){
         input[i].linesize[1]=
         input[i].linesize[2]= c->width/2;
 
-        if(!i || s->input_picture[i-1])
-            img_resample(resample, &input[i], &pre_input);
+        if(!i || s->input_picture[i-1]){
+            s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
+            s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
+            s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
+        }
     }
 
     for(j=0; j<s->max_b_frames+1; j++){
@@ -2242,7 +2304,6 @@ static int estimate_best_b_count(MpegEncContext *s){
     av_freep(&outbuf);
     avcodec_close(c);
     av_freep(&c);
-    img_resample_close(resample);
 
     for(i=0; i<s->max_b_frames+2; i++){
         av_freep(&input[i].data[0]);
@@ -2318,7 +2379,7 @@ static void select_input_picture(MpegEncContext *s){
                     }
                 }
                 for(i=0; i<s->max_b_frames+1; i++){
-                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
+                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
                 }
 
                 b_frames= FFMAX(0, i-1);
@@ -2380,21 +2441,22 @@ no_output_pic:
 
         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
 
-        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
+        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
 
             int i= ff_find_unused_picture(s, 0);
             Picture *pic= &s->picture[i];
 
+            pic->reference              = s->reordered_input_picture[0]->reference;
+            alloc_picture(s, pic, 0);
+
             /* mark us unused / free shared pic */
+            if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
+                s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
             for(i=0; i<4; i++)
                 s->reordered_input_picture[0]->data[i]= NULL;
             s->reordered_input_picture[0]->type= 0;
 
-            pic->reference              = s->reordered_input_picture[0]->reference;
-
-            alloc_picture(s, pic, 0);
-
             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
 
             s->current_picture_ptr= pic;
@@ -2425,11 +2487,6 @@ int MPV_encode_picture(AVCodecContext *avctx,
     AVFrame *pic_arg = data;
     int i, stuffing_count;
 
-    if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
-        av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
-        return -1;
-    }
-
     for(i=0; i<avctx->thread_count; i++){
         int start_y= s->thread_context[i]->start_mb_y;
         int   end_y= s->thread_context[i]->  end_mb_y;
@@ -2453,8 +2510,9 @@ int MPV_encode_picture(AVCodecContext *avctx,
 //emms_c();
 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
         MPV_frame_start(s, avctx);
-
-        encode_picture(s, s->picture_number);
+vbv_retry:
+        if (encode_picture(s, s->picture_number) < 0)
+            return -1;
 
         avctx->real_pict_num  = s->picture_number;
         avctx->header_bits = s->header_bits;
@@ -2471,6 +2529,28 @@ int MPV_encode_picture(AVCodecContext *avctx,
         if (s->out_format == FMT_MJPEG)
             mjpeg_picture_trailer(s);
 
+        if(avctx->rc_buffer_size){
+            RateControlContext *rcc= &s->rc_context;
+            int max_size= rcc->buffer_index/3;
+
+            if(put_bits_count(&s->pb) > max_size && s->qscale < s->avctx->qmax){
+                s->next_lambda= s->lambda*(s->qscale+1) / s->qscale;
+                s->mb_skipped = 0;        //done in MPV_frame_start()
+                if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
+                    if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
+                        s->no_rounding ^= 1;
+                }
+//                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
+                for(i=0; i<avctx->thread_count; i++){
+                    PutBitContext *pb= &s->thread_context[i]->pb;
+                    init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
+                }
+                goto vbv_retry;
+            }
+
+            assert(s->avctx->rc_max_rate);
+        }
+
         if(s->flags&CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
 
@@ -3328,6 +3408,18 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
 }
 
+static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    const int shift = s->quarter_sample ? 2 : 1;
+    const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
+    const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
+    int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
+    s->dsp.prefetch(pix[0]+off, s->linesize, 4);
+    off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+    s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
+}
+
 /**
  * motion compensation of a single macroblock
  * @param s context
@@ -3352,6 +3444,8 @@ static inline void MPV_motion(MpegEncContext *s,
     mb_x = s->mb_x;
     mb_y = s->mb_y;
 
+    prefetch_motion(s, ref_picture, dir);
+
     if(s->obmc && s->pict_type != B_TYPE){
         int16_t mv_cache[4][4][2];
         const int xy= s->mb_x + s->mb_y*s->mb_stride;
@@ -3877,17 +3971,16 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
                     }
                 }else{
+                    op_qpix= s->me.qpel_put;
                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
                         op_pix = s->dsp.put_pixels_tab;
-                        op_qpix= s->dsp.put_qpel_pixels_tab;
                     }else{
                         op_pix = s->dsp.put_no_rnd_pixels_tab;
-                        op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
                     }
                     if (s->mv_dir & MV_DIR_FORWARD) {
                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
                         op_pix = s->dsp.avg_pixels_tab;
-                        op_qpix= s->dsp.avg_qpel_pixels_tab;
+                        op_qpix= s->me.qpel_avg;
                     }
                     if (s->mv_dir & MV_DIR_BACKWARD) {
                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
@@ -3913,8 +4006,17 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
-                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    if (s->chroma_y_shift){
+                        add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    }else{
+                        dct_linesize >>= 1;
+                        dct_offset >>=1;
+                        add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
+                    }
                 }
             } else if(s->codec_id != CODEC_ID_WMV2){
                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
@@ -3956,8 +4058,17 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
-                    put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    if(s->chroma_y_shift){
+                        put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
+                        put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    }else{
+                        dct_offset >>=1;
+                        dct_linesize >>=1;
+                        put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
+                    }
                 }
             }else{
                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
@@ -4179,19 +4290,19 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
     }
 }
 
-static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
 {
-    int16_t weight[6][64];
-    DCTELEM orig[6][64];
+    int16_t weight[8][64];
+    DCTELEM orig[8][64];
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
     int i;
-    int skip_dct[6];
+    int skip_dct[8];
     int dct_offset   = s->linesize*8; //default for progressive frames
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int wrap_y, wrap_c;
 
-    for(i=0; i<6; i++) skip_dct[i]=0;
+    for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
 
     if(s->adaptive_quant){
         const int last_qp= s->qscale;
@@ -4227,16 +4338,16 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
     wrap_y = s->linesize;
     wrap_c = s->uvlinesize;
     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
-    ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
-    ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
+    ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
+    ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
 
     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
         uint8_t *ebuf= s->edge_emu_buffer + 32;
         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
         ptr_y= ebuf;
-        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
         ptr_cb= ebuf+18*wrap_y;
-        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
         ptr_cr= ebuf+18*wrap_y+8;
     }
 
@@ -4256,6 +4367,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 
                     dct_offset= wrap_y;
                     wrap_y<<=1;
+                    if (s->chroma_format == CHROMA_422)
+                        wrap_c<<=1;
                 }
             }
         }
@@ -4271,6 +4384,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         }else{
             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
+            if(!s->chroma_y_shift){ /* 422 */
+                s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
+                s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
+            }
         }
     }else{
         op_pixels_func (*op_pix)[4];
@@ -4316,6 +4433,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 
                     dct_offset= wrap_y;
                     wrap_y<<=1;
+                    if (s->chroma_format == CHROMA_422)
+                        wrap_c<<=1;
                 }
             }
         }
@@ -4331,6 +4450,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         }else{
             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
+            if(!s->chroma_y_shift){ /* 422 */
+                s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
+                s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
+            }
         }
         /* pre quantization */
         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
@@ -4341,6 +4464,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
+            if(!s->chroma_y_shift){ /* 422 */
+                if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
+                if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
+            }
         }
     }
 
@@ -4351,13 +4478,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
-        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
+        if(!s->chroma_y_shift){ /* 422 */
+            if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
+            if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
+        }
+        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
     }
 
     /* DCT & quantize */
     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
     {
-        for(i=0;i<6;i++) {
+        for(i=0;i<mb_block_count;i++) {
             if(!skip_dct[i]){
                 int overflow;
                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
@@ -4369,7 +4500,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
                 s->block_last_index[i]= -1;
         }
         if(s->avctx->quantizer_noise_shaping){
-            for(i=0;i<6;i++) {
+            for(i=0;i<mb_block_count;i++) {
                 if(!skip_dct[i]){
                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
                 }
@@ -4380,11 +4511,11 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             for(i=0; i<4; i++)
                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
         if(s->chroma_elim_threshold && !s->mb_intra)
-            for(i=4; i<6; i++)
+            for(i=4; i<mb_block_count; i++)
                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
 
         if(s->flags & CODEC_FLAG_CBP_RD){
-            for(i=0;i<6;i++) {
+            for(i=0;i<mb_block_count;i++) {
                 if(s->block_last_index[i] == -1)
                     s->coded_score[i]= INT_MAX/256;
             }
@@ -4400,7 +4531,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 
     //non c quantize code returns incorrect block_last_index FIXME
     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
-        for(i=0; i<6; i++){
+        for(i=0; i<mb_block_count; i++){
             int j;
             if(s->block_last_index[i]>0){
                 for(j=63; j>0; j--){
@@ -4441,6 +4572,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
     }
 }
 
+static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+{
+    if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
+    else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
+}
+
 #endif //CONFIG_ENCODERS
 
 void ff_mpeg_flush(AVCodecContext *avctx){
@@ -4550,7 +4687,7 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
         d->tex_pb= s->tex_pb;
     }
     d->block= s->block;
-    for(i=0; i<6; i++)
+    for(i=0; i<8; i++)
         d->block_last_index[i]= s->block_last_index[i];
     d->interlaced_dct= s->interlaced_dct;
     d->qscale= s->qscale;
@@ -5357,10 +5494,17 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
     flush_put_bits(&dst->pb);
 }
 
-static void estimate_qp(MpegEncContext *s, int dry_run){
-    if (!s->fixed_qscale)
+static int estimate_qp(MpegEncContext *s, int dry_run){
+    if (s->next_lambda){
+        s->current_picture_ptr->quality=
+        s->current_picture.quality = s->next_lambda;
+        if(!dry_run) s->next_lambda= 0;
+    } else if (!s->fixed_qscale) {
         s->current_picture_ptr->quality=
         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
+        if (s->current_picture.quality < 0)
+            return -1;
+    }
 
     if(s->adaptive_quant){
         switch(s->codec_id){
@@ -5380,9 +5524,10 @@ static void estimate_qp(MpegEncContext *s, int dry_run){
         s->lambda= s->current_picture.quality;
 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
     update_qscale(s);
+    return 0;
 }
 
-static void encode_picture(MpegEncContext *s, int picture_number)
+static int encode_picture(MpegEncContext *s, int picture_number)
 {
     int i;
     int bits;
@@ -5411,7 +5556,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     }
 
     if(s->flags & CODEC_FLAG_PASS2){
-        estimate_qp(s, 1);
+        if (estimate_qp(s,1) < 0)
+            return -1;
         ff_get_2pass_fcode(s);
     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
         if(s->pict_type==B_TYPE)
@@ -5517,7 +5663,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         }
     }
 
-    estimate_qp(s, 0);
+    if (estimate_qp(s, 0) < 0)
+        return -1;
 
     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
         s->qscale= 3; //reduce clipping problems
@@ -5528,7 +5675,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         for(i=1;i<64;i++){
             int j= s->dsp.idct_permutation[i];
 
-            s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
+            s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
         }
         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
@@ -5593,10 +5740,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         merge_context_after_encode(s, s->thread_context[i]);
     }
     emms_c();
+    return 0;
 }
 
-#endif //CONFIG_ENCODERS
-
 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
     const int intra= s->mb_intra;
     int i;
@@ -5621,8 +5767,6 @@ static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
     }
 }
 
-#ifdef CONFIG_ENCODERS
-
 static int dct_quantize_trellis_c(MpegEncContext *s,
                         DCTELEM *block, int n,
                         int qscale, int *overflow){
@@ -5937,7 +6081,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
                         int n, int qscale){
     int16_t rem[64];
-    DCTELEM d1[64] __align16;
+    DECLARE_ALIGNED_16(DCTELEM, d1[64]);
     const int *qmat;
     const uint8_t *scantable= s->intra_scantable.scantable;
     const uint8_t *perm_scantable= s->intra_scantable.permutated;
@@ -6482,6 +6626,39 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
     }
 }
 
+static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+    int sum=-1;
+
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
+
+    if (n < 4)
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+            }
+            block[j] = level;
+            sum+=level;
+        }
+    }
+    block[63]^=sum&1;
+}
+
 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale)
 {
@@ -6696,7 +6873,7 @@ AVCodec mjpeg_encoder = {
     MPV_encode_init,
     MPV_encode_picture,
     MPV_encode_end,
-    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
 };
 
 #endif //CONFIG_ENCODERS