git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "mjpegenc.h"
  34 #include "msmpeg4.h"
  35 #include "faandct.h"
  36 #include <limits.h>
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static int encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  59 #ifdef CONFIG_ENCODERS
  60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  63 static int sse_mb(MpegEncContext *s);
  64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  65 #endif //CONFIG_ENCODERS
  66
  67 #ifdef HAVE_XVMC
  68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  69 extern void XVMC_field_end(MpegEncContext *s);
  70 extern void XVMC_decode_mb(MpegEncContext *s);
  71 #endif
  72
  73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  74
  75
  76 /* enable all paranoid tests for rounding, overflows, etc... */
  77 //#define PARANOID
  78
  79 //#define DEBUG
  80
  81
  82 /* for jpeg fast DCT */
  83 #define CONST_BITS 14
  84
  85 static const uint16_t aanscales[64] = {
  86     /* precomputed values scaled up by 14 bits */
  87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  95 };
  96
  97 static const uint8_t h263_chroma_roundtab[16] = {
  98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 100 };
 101
 102 static const uint8_t ff_default_chroma_qscale_table[32]={
 103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 105 };
 106
 107 #ifdef CONFIG_ENCODERS
 108 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
 109 static uint8_t default_fcode_tab[MAX_MV*2+1];
 110
 111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 112
 113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 115 {
 116     int qscale;
 117     int shift=0;
 118
 119     for(qscale=qmin; qscale<=qmax; qscale++){
 120         int i;
 121         if (dsp->fdct == ff_jpeg_fdct_islow
 122 #ifdef FAAN_POSTSCALE
 123             || dsp->fdct == ff_faandct
 124 #endif
 125             ) {
 126             for(i=0;i<64;i++) {
 127                 const int j= dsp->idct_permutation[i];
 128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 132
 133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 134                                 (qscale * quant_matrix[j]));
 135             }
 136         } else if (dsp->fdct == fdct_ifast
 137 #ifndef FAAN_POSTSCALE
 138                    || dsp->fdct == ff_faandct
 139 #endif
 140                    ) {
 141             for(i=0;i<64;i++) {
 142                 const int j= dsp->idct_permutation[i];
 143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 147
 148                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 149                                 (aanscales[i] * qscale * quant_matrix[j]));
 150             }
 151         } else {
 152             for(i=0;i<64;i++) {
 153                 const int j= dsp->idct_permutation[i];
 154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 155                    So 16           <= qscale * quant_matrix[i]             <= 7905
 156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 158                 */
 159                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 162
 163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 165             }
 166         }
 167
 168         for(i=intra; i<64; i++){
 169             int64_t max= 8191;
 170             if (dsp->fdct == fdct_ifast
 171 #ifndef FAAN_POSTSCALE
 172                    || dsp->fdct == ff_faandct
 173 #endif
 174                    ) {
 175                 max= (8191LL*aanscales[i]) >> 14;
 176             }
 177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 178                 shift++;
 179             }
 180         }
 181     }
 182     if(shift){
 183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
 184     }
 185 }
 186
 187 static inline void update_qscale(MpegEncContext *s){
 188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 189     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 190
 191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 192 }
 193 #endif //CONFIG_ENCODERS
 194
 195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 196     int i;
 197     int end;
 198
 199     st->scantable= src_scantable;
 200
 201     for(i=0; i<64; i++){
 202         int j;
 203         j = src_scantable[i];
 204         st->permutated[i] = permutation[j];
 205 #ifdef ARCH_POWERPC
 206         st->inverse[j] = i;
 207 #endif
 208     }
 209
 210     end=-1;
 211     for(i=0; i<64; i++){
 212         int j;
 213         j = st->permutated[i];
 214         if(j>end) end=j;
 215         st->raster_end[i]= end;
 216     }
 217 }
 218
 219 #ifdef CONFIG_ENCODERS
 220 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 221     int i;
 222
 223     if(matrix){
 224         put_bits(pb, 1, 1);
 225         for(i=0;i<64;i++) {
 226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 227         }
 228     }else
 229         put_bits(pb, 1, 0);
 230 }
 231 #endif //CONFIG_ENCODERS
 232
 233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 234     int i;
 235
 236     assert(p<=end);
 237     if(p>=end)
 238         return end;
 239
 240     for(i=0; i<3; i++){
 241         uint32_t tmp= *state << 8;
 242         *state= tmp + *(p++);
 243         if(tmp == 0x100 || p==end)
 244             return p;
 245     }
 246
 247     while(p<end){
 248         if     (p[-1] > 1      ) p+= 3;
 249         else if(p[-2]          ) p+= 2;
 250         else if(p[-3]|(p[-1]-1)) p++;
 251         else{
 252             p++;
 253             break;
 254         }
 255     }
 256
 257     p= FFMIN(p, end)-4;
 258     *state=  be2me_32(unaligned32(p));
 259
 260     return p+4;
 261 }
 262
 263 /* init common dct for both encoder and decoder */
 264 int DCT_common_init(MpegEncContext *s)
 265 {
 266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 271     if(s->flags & CODEC_FLAG_BITEXACT)
 272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 274
 275 #ifdef CONFIG_ENCODERS
 276     s->dct_quantize= dct_quantize_c;
 277     s->denoise_dct= denoise_dct_c;
 278 #endif //CONFIG_ENCODERS
 279
 280 #ifdef HAVE_MMX
 281     MPV_common_init_mmx(s);
 282 #endif
 283 #ifdef ARCH_ALPHA
 284     MPV_common_init_axp(s);
 285 #endif
 286 #ifdef HAVE_MLIB
 287     MPV_common_init_mlib(s);
 288 #endif
 289 #ifdef HAVE_MMI
 290     MPV_common_init_mmi(s);
 291 #endif
 292 #ifdef ARCH_ARMV4L
 293     MPV_common_init_armv4l(s);
 294 #endif
 295 #ifdef ARCH_POWERPC
 296     MPV_common_init_ppc(s);
 297 #endif
 298 #ifdef ARCH_BFIN
 299     MPV_common_init_bfin(s);
 300 #endif
 301
 302 #ifdef CONFIG_ENCODERS
 303     s->fast_dct_quantize= s->dct_quantize;
 304
 305     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 306         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 307     }
 308
 309 #endif //CONFIG_ENCODERS
 310
 311     /* load & permutate scantables
 312        note: only wmv uses different ones
 313     */
 314     if(s->alternate_scan){
 315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 317     }else{
 318         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 319         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 320     }
 321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 322     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 323
 324     return 0;
 325 }
 326
 327 static void copy_picture(Picture *dst, Picture *src){
 328     *dst = *src;
 329     dst->type= FF_BUFFER_TYPE_COPY;
 330 }
 331
 332 #ifdef CONFIG_ENCODERS
 333 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 334     int i;
 335
 336     dst->pict_type              = src->pict_type;
 337     dst->quality                = src->quality;
 338     dst->coded_picture_number   = src->coded_picture_number;
 339     dst->display_picture_number = src->display_picture_number;
 340 //    dst->reference              = src->reference;
 341     dst->pts                    = src->pts;
 342     dst->interlaced_frame       = src->interlaced_frame;
 343     dst->top_field_first        = src->top_field_first;
 344
 345     if(s->avctx->me_threshold){
 346         if(!src->motion_val[0])
 347             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 348         if(!src->mb_type)
 349             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 350         if(!src->ref_index[0])
 351             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 352         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 353             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 354             src->motion_subsample_log2, dst->motion_subsample_log2);
 355
 356         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 357
 358         for(i=0; i<2; i++){
 359             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 360             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 361
 362             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 363                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 364             }
 365             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 366                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 367             }
 368         }
 369     }
 370 }
 371 #endif
 372
 373 /**
 374  * allocates a Picture
 375  * The pixels are allocated/set by calling get_buffer() if shared=0
 376  */
 377 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 378     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 379     const int mb_array_size= s->mb_stride*s->mb_height;
 380     const int b8_array_size= s->b8_stride*s->mb_height*2;
 381     const int b4_array_size= s->b4_stride*s->mb_height*4;
 382     int i;
 383
 384     if(shared){
 385         assert(pic->data[0]);
 386         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 387         pic->type= FF_BUFFER_TYPE_SHARED;
 388     }else{
 389         int r;
 390
 391         assert(!pic->data[0]);
 392
 393         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 394
 395         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 396             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 397             return -1;
 398         }
 399
 400         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 401             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 402             return -1;
 403         }
 404
 405         if(pic->linesize[1] != pic->linesize[2]){
 406             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 407             return -1;
 408         }
 409
 410         s->linesize  = pic->linesize[0];
 411         s->uvlinesize= pic->linesize[1];
 412     }
 413
 414     if(pic->qscale_table==NULL){
 415         if (s->encoding) {
 416             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 417             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 418             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 419         }
 420
 421         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 422         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 423         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 424         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 425         if(s->out_format == FMT_H264){
 426             for(i=0; i<2; i++){
 427                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 428                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 429                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 430             }
 431             pic->motion_subsample_log2= 2;
 432         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 433             for(i=0; i<2; i++){
 434                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 435                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 436                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 437             }
 438             pic->motion_subsample_log2= 3;
 439         }
 440         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 441             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 442         }
 443         pic->qstride= s->mb_stride;
 444         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 445     }
 446
 447     /* It might be nicer if the application would keep track of these
 448      * but it would require an API change. */
 449     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 450     s->prev_pict_types[0]= s->pict_type;
 451     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 452         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 453
 454     return 0;
 455 fail: //for the CHECKED_ALLOCZ macro
 456     return -1;
 457 }
 458
 459 /**
 460  * deallocates a picture
 461  */
 462 static void free_picture(MpegEncContext *s, Picture *pic){
 463     int i;
 464
 465     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 466         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 467     }
 468
 469     av_freep(&pic->mb_var);
 470     av_freep(&pic->mc_mb_var);
 471     av_freep(&pic->mb_mean);
 472     av_freep(&pic->mbskip_table);
 473     av_freep(&pic->qscale_table);
 474     av_freep(&pic->mb_type_base);
 475     av_freep(&pic->dct_coeff);
 476     av_freep(&pic->pan_scan);
 477     pic->mb_type= NULL;
 478     for(i=0; i<2; i++){
 479         av_freep(&pic->motion_val_base[i]);
 480         av_freep(&pic->ref_index[i]);
 481     }
 482
 483     if(pic->type == FF_BUFFER_TYPE_SHARED){
 484         for(i=0; i<4; i++){
 485             pic->base[i]=
 486             pic->data[i]= NULL;
 487         }
 488         pic->type= 0;
 489     }
 490 }
 491
 492 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 493     int i;
 494
 495     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 496     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 497     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 498
 499      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 500     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 501     s->rd_scratchpad=   s->me.scratchpad;
 502     s->b_scratchpad=    s->me.scratchpad;
 503     s->obmc_scratchpad= s->me.scratchpad + 16;
 504     if (s->encoding) {
 505         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 506         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 507         if(s->avctx->noise_reduction){
 508             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 509         }
 510     }
 511     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 512     s->block= s->blocks[0];
 513
 514     for(i=0;i<12;i++){
 515         s->pblocks[i] = (short *)(&s->block[i]);
 516     }
 517     return 0;
 518 fail:
 519     return -1; //free() through MPV_common_end()
 520 }
 521
 522 static void free_duplicate_context(MpegEncContext *s){
 523     if(s==NULL) return;
 524
 525     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 526     av_freep(&s->me.scratchpad);
 527     s->rd_scratchpad=
 528     s->b_scratchpad=
 529     s->obmc_scratchpad= NULL;
 530
 531     av_freep(&s->dct_error_sum);
 532     av_freep(&s->me.map);
 533     av_freep(&s->me.score_map);
 534     av_freep(&s->blocks);
 535     s->block= NULL;
 536 }
 537
 538 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 539 #define COPY(a) bak->a= src->a
 540     COPY(allocated_edge_emu_buffer);
 541     COPY(edge_emu_buffer);
 542     COPY(me.scratchpad);
 543     COPY(rd_scratchpad);
 544     COPY(b_scratchpad);
 545     COPY(obmc_scratchpad);
 546     COPY(me.map);
 547     COPY(me.score_map);
 548     COPY(blocks);
 549     COPY(block);
 550     COPY(start_mb_y);
 551     COPY(end_mb_y);
 552     COPY(me.map_generation);
 553     COPY(pb);
 554     COPY(dct_error_sum);
 555     COPY(dct_count[0]);
 556     COPY(dct_count[1]);
 557 #undef COPY
 558 }
 559
 560 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 561     MpegEncContext bak;
 562     int i;
 563     //FIXME copy only needed parts
 564 //START_TIMER
 565     backup_duplicate_context(&bak, dst);
 566     memcpy(dst, src, sizeof(MpegEncContext));
 567     backup_duplicate_context(dst, &bak);
 568     for(i=0;i<12;i++){
 569         dst->pblocks[i] = (short *)(&dst->block[i]);
 570     }
 571 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 572 }
 573
 574 #ifdef CONFIG_ENCODERS
 575 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 576 #define COPY(a) dst->a= src->a
 577     COPY(pict_type);
 578     COPY(current_picture);
 579     COPY(f_code);
 580     COPY(b_code);
 581     COPY(qscale);
 582     COPY(lambda);
 583     COPY(lambda2);
 584     COPY(picture_in_gop_number);
 585     COPY(gop_picture_number);
 586     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 587     COPY(progressive_frame); //FIXME don't set in encode_header
 588     COPY(partitioned_frame); //FIXME don't set in encode_header
 589 #undef COPY
 590 }
 591 #endif
 592
 593 /**
 594  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 595  * the changed fields will not depend upon the prior state of the MpegEncContext.
 596  */
 597 static void MPV_common_defaults(MpegEncContext *s){
 598     s->y_dc_scale_table=
 599     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 600     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 601     s->progressive_frame= 1;
 602     s->progressive_sequence= 1;
 603     s->picture_structure= PICT_FRAME;
 604
 605     s->coded_picture_number = 0;
 606     s->picture_number = 0;
 607     s->input_picture_number = 0;
 608
 609     s->picture_in_gop_number = 0;
 610
 611     s->f_code = 1;
 612     s->b_code = 1;
 613 }
 614
 615 /**
 616  * sets the given MpegEncContext to defaults for decoding.
 617  * the changed fields will not depend upon the prior state of the MpegEncContext.
 618  */
 619 void MPV_decode_defaults(MpegEncContext *s){
 620     MPV_common_defaults(s);
 621 }
 622
 623 /**
 624  * sets the given MpegEncContext to defaults for encoding.
 625  * the changed fields will not depend upon the prior state of the MpegEncContext.
 626  */
 627
 628 #ifdef CONFIG_ENCODERS
 629 static void MPV_encode_defaults(MpegEncContext *s){
 630     int i;
 631     MPV_common_defaults(s);
 632
 633     for(i=-16; i<16; i++){
 634         default_fcode_tab[i + MAX_MV]= 1;
 635     }
 636     s->me.mv_penalty= default_mv_penalty;
 637     s->fcode_tab= default_fcode_tab;
 638 }
 639 #endif //CONFIG_ENCODERS
 640
 641 /**
 642  * init common structure for both encoder and decoder.
 643  * this assumes that some variables like width/height are already set
 644  */
 645 int MPV_common_init(MpegEncContext *s)
 646 {
 647     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 648
 649     s->mb_height = (s->height + 15) / 16;
 650
 651     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 652         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 653         return -1;
 654     }
 655
 656     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 657         return -1;
 658
 659     dsputil_init(&s->dsp, s->avctx);
 660     DCT_common_init(s);
 661
 662     s->flags= s->avctx->flags;
 663     s->flags2= s->avctx->flags2;
 664
 665     s->mb_width  = (s->width  + 15) / 16;
 666     s->mb_stride = s->mb_width + 1;
 667     s->b8_stride = s->mb_width*2 + 1;
 668     s->b4_stride = s->mb_width*4 + 1;
 669     mb_array_size= s->mb_height * s->mb_stride;
 670     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 671
 672     /* set chroma shifts */
 673     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 674                                                     &(s->chroma_y_shift) );
 675
 676     /* set default edge pos, will be overriden in decode_header if needed */
 677     s->h_edge_pos= s->mb_width*16;
 678     s->v_edge_pos= s->mb_height*16;
 679
 680     s->mb_num = s->mb_width * s->mb_height;
 681
 682     s->block_wrap[0]=
 683     s->block_wrap[1]=
 684     s->block_wrap[2]=
 685     s->block_wrap[3]= s->b8_stride;
 686     s->block_wrap[4]=
 687     s->block_wrap[5]= s->mb_stride;
 688
 689     y_size = s->b8_stride * (2 * s->mb_height + 1);
 690     c_size = s->mb_stride * (s->mb_height + 1);
 691     yc_size = y_size + 2 * c_size;
 692
 693     /* convert fourcc to upper case */
 694     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 695                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 696                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 697                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 698
 699     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 700                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 701                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 702                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 703
 704     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 705
 706     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 707     for(y=0; y<s->mb_height; y++){
 708         for(x=0; x<s->mb_width; x++){
 709             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 710         }
 711     }
 712     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 713
 714     if (s->encoding) {
 715         /* Allocate MV tables */
 716         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 717         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 718         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 719         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 720         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 721         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 722         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 723         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 724         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 725         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 726         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 727         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 728
 729         if(s->msmpeg4_version){
 730             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 731         }
 732         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 733
 734         /* Allocate MB type table */
 735         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 736
 737         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 738
 739         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 740         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 741         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 742         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 743         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 744         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 745
 746         if(s->avctx->noise_reduction){
 747             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 748         }
 749     }
 750     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 751
 752     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 753
 754     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 755         /* interlaced direct mode decoding tables */
 756             for(i=0; i<2; i++){
 757                 int j, k;
 758                 for(j=0; j<2; j++){
 759                     for(k=0; k<2; k++){
 760                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 761                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 762                     }
 763                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 764                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 765                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 766                 }
 767                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 768             }
 769     }
 770     if (s->out_format == FMT_H263) {
 771         /* ac values */
 772         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 773         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 774         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 775         s->ac_val[2] = s->ac_val[1] + c_size;
 776
 777         /* cbp values */
 778         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 779         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 780
 781         /* cbp, ac_pred, pred_dir */
 782         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 783         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 784     }
 785
 786     if (s->h263_pred || s->h263_plus || !s->encoding) {
 787         /* dc values */
 788         //MN: we need these for error resilience of intra-frames
 789         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 790         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 791         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 792         s->dc_val[2] = s->dc_val[1] + c_size;
 793         for(i=0;i<yc_size;i++)
 794             s->dc_val_base[i] = 1024;
 795     }
 796
 797     /* which mb is a intra block */
 798     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 799     memset(s->mbintra_table, 1, mb_array_size);
 800
 801     /* init macroblock skip table */
 802     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 803     //Note the +1 is for a quicker mpeg4 slice_end detection
 804     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 805
 806     s->parse_context.state= -1;
 807     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 808        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 809        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 810        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 811     }
 812
 813     s->context_initialized = 1;
 814
 815     s->thread_context[0]= s;
 816     for(i=1; i<s->avctx->thread_count; i++){
 817         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 818         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 819     }
 820
 821     for(i=0; i<s->avctx->thread_count; i++){
 822         if(init_duplicate_context(s->thread_context[i], s) < 0)
 823            goto fail;
 824         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 825         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 826     }
 827
 828     return 0;
 829  fail:
 830     MPV_common_end(s);
 831     return -1;
 832 }
 833
 834 /* init common structure for both encoder and decoder */
 835 void MPV_common_end(MpegEncContext *s)
 836 {
 837     int i, j, k;
 838
 839     for(i=0; i<s->avctx->thread_count; i++){
 840         free_duplicate_context(s->thread_context[i]);
 841     }
 842     for(i=1; i<s->avctx->thread_count; i++){
 843         av_freep(&s->thread_context[i]);
 844     }
 845
 846     av_freep(&s->parse_context.buffer);
 847     s->parse_context.buffer_size=0;
 848
 849     av_freep(&s->mb_type);
 850     av_freep(&s->p_mv_table_base);
 851     av_freep(&s->b_forw_mv_table_base);
 852     av_freep(&s->b_back_mv_table_base);
 853     av_freep(&s->b_bidir_forw_mv_table_base);
 854     av_freep(&s->b_bidir_back_mv_table_base);
 855     av_freep(&s->b_direct_mv_table_base);
 856     s->p_mv_table= NULL;
 857     s->b_forw_mv_table= NULL;
 858     s->b_back_mv_table= NULL;
 859     s->b_bidir_forw_mv_table= NULL;
 860     s->b_bidir_back_mv_table= NULL;
 861     s->b_direct_mv_table= NULL;
 862     for(i=0; i<2; i++){
 863         for(j=0; j<2; j++){
 864             for(k=0; k<2; k++){
 865                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 866                 s->b_field_mv_table[i][j][k]=NULL;
 867             }
 868             av_freep(&s->b_field_select_table[i][j]);
 869             av_freep(&s->p_field_mv_table_base[i][j]);
 870             s->p_field_mv_table[i][j]=NULL;
 871         }
 872         av_freep(&s->p_field_select_table[i]);
 873     }
 874
 875     av_freep(&s->dc_val_base);
 876     av_freep(&s->ac_val_base);
 877     av_freep(&s->coded_block_base);
 878     av_freep(&s->mbintra_table);
 879     av_freep(&s->cbp_table);
 880     av_freep(&s->pred_dir_table);
 881
 882     av_freep(&s->mbskip_table);
 883     av_freep(&s->prev_pict_types);
 884     av_freep(&s->bitstream_buffer);
 885     s->allocated_bitstream_buffer_size=0;
 886
 887     av_freep(&s->avctx->stats_out);
 888     av_freep(&s->ac_stats);
 889     av_freep(&s->error_status_table);
 890     av_freep(&s->mb_index2xy);
 891     av_freep(&s->lambda_table);
 892     av_freep(&s->q_intra_matrix);
 893     av_freep(&s->q_inter_matrix);
 894     av_freep(&s->q_intra_matrix16);
 895     av_freep(&s->q_inter_matrix16);
 896     av_freep(&s->input_picture);
 897     av_freep(&s->reordered_input_picture);
 898     av_freep(&s->dct_offset);
 899
 900     if(s->picture){
 901         for(i=0; i<MAX_PICTURE_COUNT; i++){
 902             free_picture(s, &s->picture[i]);
 903         }
 904     }
 905     av_freep(&s->picture);
 906     s->context_initialized = 0;
 907     s->last_picture_ptr=
 908     s->next_picture_ptr=
 909     s->current_picture_ptr= NULL;
 910     s->linesize= s->uvlinesize= 0;
 911
 912     for(i=0; i<3; i++)
 913         av_freep(&s->visualization_buffer[i]);
 914
 915     avcodec_default_free_buffers(s->avctx);
 916 }
 917
 918 #ifdef CONFIG_ENCODERS
 919
 920 /* init video encoder */
 921 int MPV_encode_init(AVCodecContext *avctx)
 922 {
 923     MpegEncContext *s = avctx->priv_data;
 924     int i;
 925     int chroma_h_shift, chroma_v_shift;
 926
 927     MPV_encode_defaults(s);
 928
 929     switch (avctx->codec_id) {
 930     case CODEC_ID_MPEG2VIDEO:
 931         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 932             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 933             return -1;
 934         }
 935         break;
 936     case CODEC_ID_LJPEG:
 937     case CODEC_ID_MJPEG:
 938         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 939            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 940             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 941             return -1;
 942         }
 943         break;
 944     default:
 945         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 946             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 947             return -1;
 948         }
 949     }
 950
 951     switch (avctx->pix_fmt) {
 952     case PIX_FMT_YUVJ422P:
 953     case PIX_FMT_YUV422P:
 954         s->chroma_format = CHROMA_422;
 955         break;
 956     case PIX_FMT_YUVJ420P:
 957     case PIX_FMT_YUV420P:
 958     default:
 959         s->chroma_format = CHROMA_420;
 960         break;
 961     }
 962
 963     s->bit_rate = avctx->bit_rate;
 964     s->width = avctx->width;
 965     s->height = avctx->height;
 966     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 967         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 968         avctx->gop_size=600;
 969     }
 970     s->gop_size = avctx->gop_size;
 971     s->avctx = avctx;
 972     s->flags= avctx->flags;
 973     s->flags2= avctx->flags2;
 974     s->max_b_frames= avctx->max_b_frames;
 975     s->codec_id= avctx->codec->id;
 976     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 977     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 978     s->strict_std_compliance= avctx->strict_std_compliance;
 979     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 980     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 981     s->mpeg_quant= avctx->mpeg_quant;
 982     s->rtp_mode= !!avctx->rtp_payload_size;
 983     s->intra_dc_precision= avctx->intra_dc_precision;
 984     s->user_specified_pts = AV_NOPTS_VALUE;
 985
 986     if (s->gop_size <= 1) {
 987         s->intra_only = 1;
 988         s->gop_size = 12;
 989     } else {
 990         s->intra_only = 0;
 991     }
 992
 993     s->me_method = avctx->me_method;
 994
 995     /* Fixed QSCALE */
 996     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 997
 998     s->adaptive_quant= (   s->avctx->lumi_masking
 999                         || s->avctx->dark_masking
1000                         || s->avctx->temporal_cplx_masking
1001                         || s->avctx->spatial_cplx_masking
1002                         || s->avctx->p_masking
1003                         || s->avctx->border_masking
1004                         || (s->flags&CODEC_FLAG_QP_RD))
1005                        && !s->fixed_qscale;
1006
1007     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1008     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1009     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1010     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1011     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1012
1013     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1014         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1015         return -1;
1016     }
1017
1018     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1019         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1020     }
1021
1022     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1023         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
1024         return -1;
1025     }
1026
1027     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1028         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1029         return -1;
1030     }
1031
1032     if(avctx->rc_buffer_size && avctx->bit_rate*av_q2d(avctx->time_base) > avctx->rc_buffer_size){
1033         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
1034         return -1;
1035     }
1036
1037     if(avctx->bit_rate*av_q2d(avctx->time_base) > avctx->bit_rate_tolerance){
1038         av_log(avctx, AV_LOG_ERROR, "bitrate tolerance too small for bitrate\n");
1039         return -1;
1040     }
1041
1042     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1043        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1044        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1045
1046         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1047     }
1048
1049     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1050        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1051         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1052         return -1;
1053     }
1054
1055     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1056         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1057         return -1;
1058     }
1059
1060     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1061         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1062         return -1;
1063     }
1064
1065     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1066         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1067         return -1;
1068     }
1069
1070     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1071         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1072         return -1;
1073     }
1074
1075     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1076         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1081        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1082         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1083         return -1;
1084     }
1085
1086     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1087         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1088         return -1;
1089     }
1090
1091     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1092         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1093         return -1;
1094     }
1095
1096     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1097         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1098         return -1;
1099     }
1100
1101     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1102         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1103         return -1;
1104     }
1105
1106     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1107         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1108         return -1;
1109     }
1110
1111     if(s->flags & CODEC_FLAG_LOW_DELAY){
1112         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1113             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1114             return -1;
1115         }
1116         if (s->max_b_frames != 0){
1117             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1118             return -1;
1119         }
1120     }
1121
1122     if(s->q_scale_type == 1){
1123         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1124             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1125             return -1;
1126         }
1127         if(avctx->qmax > 12){
1128             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1129             return -1;
1130         }
1131     }
1132
1133     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1134        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1135        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1136         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1137         return -1;
1138     }
1139
1140     if(s->avctx->thread_count > 1)
1141         s->rtp_mode= 1;
1142
1143     if(!avctx->time_base.den || !avctx->time_base.num){
1144         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1145         return -1;
1146     }
1147
1148     i= (INT_MAX/2+128)>>8;
1149     if(avctx->me_threshold >= i){
1150         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1151         return -1;
1152     }
1153     if(avctx->mb_threshold >= i){
1154         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1155         return -1;
1156     }
1157
1158     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1159         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1160         avctx->b_frame_strategy = 0;
1161     }
1162
1163     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1164     if(i > 1){
1165         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1166         avctx->time_base.den /= i;
1167         avctx->time_base.num /= i;
1168 //        return -1;
1169     }
1170
1171     if(s->codec_id==CODEC_ID_MJPEG){
1172         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1173         s->inter_quant_bias= 0;
1174     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1175         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1176         s->inter_quant_bias= 0;
1177     }else{
1178         s->intra_quant_bias=0;
1179         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1180     }
1181
1182     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1183         s->intra_quant_bias= avctx->intra_quant_bias;
1184     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1185         s->inter_quant_bias= avctx->inter_quant_bias;
1186
1187     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1188
1189     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1190         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1191         return -1;
1192     }
1193     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1194
1195     switch(avctx->codec->id) {
1196     case CODEC_ID_MPEG1VIDEO:
1197         s->out_format = FMT_MPEG1;
1198         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1199         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1200         break;
1201     case CODEC_ID_MPEG2VIDEO:
1202         s->out_format = FMT_MPEG1;
1203         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1204         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1205         s->rtp_mode= 1;
1206         break;
1207     case CODEC_ID_LJPEG:
1208     case CODEC_ID_MJPEG:
1209         s->out_format = FMT_MJPEG;
1210         s->intra_only = 1; /* force intra only for jpeg */
1211         s->mjpeg_vsample[0] = 2;
1212         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1213         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1214         s->mjpeg_hsample[0] = 2;
1215         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1216         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1217         if (!(ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER)
1218             || ff_mjpeg_encode_init(s) < 0)
1219             return -1;
1220         avctx->delay=0;
1221         s->low_delay=1;
1222         break;
1223     case CODEC_ID_H261:
1224         if (!ENABLE_H261_ENCODER)  return -1;
1225         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1226             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1227             return -1;
1228         }
1229         s->out_format = FMT_H261;
1230         avctx->delay=0;
1231         s->low_delay=1;
1232         break;
1233     case CODEC_ID_H263:
1234         if (h263_get_picture_format(s->width, s->height) == 7) {
1235             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1236             return -1;
1237         }
1238         s->out_format = FMT_H263;
1239         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1240         avctx->delay=0;
1241         s->low_delay=1;
1242         break;
1243     case CODEC_ID_H263P:
1244         s->out_format = FMT_H263;
1245         s->h263_plus = 1;
1246         /* Fx */
1247         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1248         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1249         s->modified_quant= s->h263_aic;
1250         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1251         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1252         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1253         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1254         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1255
1256         /* /Fx */
1257         /* These are just to be sure */
1258         avctx->delay=0;
1259         s->low_delay=1;
1260         break;
1261     case CODEC_ID_FLV1:
1262         s->out_format = FMT_H263;
1263         s->h263_flv = 2; /* format = 1; 11-bit codes */
1264         s->unrestricted_mv = 1;
1265         s->rtp_mode=0; /* don't allow GOB */
1266         avctx->delay=0;
1267         s->low_delay=1;
1268         break;
1269     case CODEC_ID_RV10:
1270         s->out_format = FMT_H263;
1271         avctx->delay=0;
1272         s->low_delay=1;
1273         break;
1274     case CODEC_ID_RV20:
1275         s->out_format = FMT_H263;
1276         avctx->delay=0;
1277         s->low_delay=1;
1278         s->modified_quant=1;
1279         s->h263_aic=1;
1280         s->h263_plus=1;
1281         s->loop_filter=1;
1282         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1283         break;
1284     case CODEC_ID_MPEG4:
1285         s->out_format = FMT_H263;
1286         s->h263_pred = 1;
1287         s->unrestricted_mv = 1;
1288         s->low_delay= s->max_b_frames ? 0 : 1;
1289         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1290         break;
1291     case CODEC_ID_MSMPEG4V1:
1292         s->out_format = FMT_H263;
1293         s->h263_msmpeg4 = 1;
1294         s->h263_pred = 1;
1295         s->unrestricted_mv = 1;
1296         s->msmpeg4_version= 1;
1297         avctx->delay=0;
1298         s->low_delay=1;
1299         break;
1300     case CODEC_ID_MSMPEG4V2:
1301         s->out_format = FMT_H263;
1302         s->h263_msmpeg4 = 1;
1303         s->h263_pred = 1;
1304         s->unrestricted_mv = 1;
1305         s->msmpeg4_version= 2;
1306         avctx->delay=0;
1307         s->low_delay=1;
1308         break;
1309     case CODEC_ID_MSMPEG4V3:
1310         s->out_format = FMT_H263;
1311         s->h263_msmpeg4 = 1;
1312         s->h263_pred = 1;
1313         s->unrestricted_mv = 1;
1314         s->msmpeg4_version= 3;
1315         s->flipflop_rounding=1;
1316         avctx->delay=0;
1317         s->low_delay=1;
1318         break;
1319     case CODEC_ID_WMV1:
1320         s->out_format = FMT_H263;
1321         s->h263_msmpeg4 = 1;
1322         s->h263_pred = 1;
1323         s->unrestricted_mv = 1;
1324         s->msmpeg4_version= 4;
1325         s->flipflop_rounding=1;
1326         avctx->delay=0;
1327         s->low_delay=1;
1328         break;
1329     case CODEC_ID_WMV2:
1330         s->out_format = FMT_H263;
1331         s->h263_msmpeg4 = 1;
1332         s->h263_pred = 1;
1333         s->unrestricted_mv = 1;
1334         s->msmpeg4_version= 5;
1335         s->flipflop_rounding=1;
1336         avctx->delay=0;
1337         s->low_delay=1;
1338         break;
1339     default:
1340         return -1;
1341     }
1342
1343     avctx->has_b_frames= !s->low_delay;
1344
1345     s->encoding = 1;
1346
1347     /* init */
1348     if (MPV_common_init(s) < 0)
1349         return -1;
1350
1351     if(s->modified_quant)
1352         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1353     s->progressive_frame=
1354     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1355     s->quant_precision=5;
1356
1357     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1358     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1359
1360     if (ENABLE_H261_ENCODER && s->out_format == FMT_H261)
1361         ff_h261_encode_init(s);
1362     if (s->out_format == FMT_H263)
1363         h263_encode_init(s);
1364     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version)
1365         ff_msmpeg4_encode_init(s);
1366     if (s->out_format == FMT_MPEG1)
1367         ff_mpeg1_encode_init(s);
1368
1369     /* init q matrix */
1370     for(i=0;i<64;i++) {
1371         int j= s->dsp.idct_permutation[i];
1372         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1373             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1374             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1375         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1376             s->intra_matrix[j] =
1377             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1378         }else
1379         { /* mpeg1/2 */
1380             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1381             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1382         }
1383         if(s->avctx->intra_matrix)
1384             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1385         if(s->avctx->inter_matrix)
1386             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1387     }
1388
1389     /* precompute matrix */
1390     /* for mjpeg, we do include qscale in the matrix */
1391     if (s->out_format != FMT_MJPEG) {
1392         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1393                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1394         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1395                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1396     }
1397
1398     if(ff_rate_control_init(s) < 0)
1399         return -1;
1400
1401     return 0;
1402 }
1403
1404 int MPV_encode_end(AVCodecContext *avctx)
1405 {
1406     MpegEncContext *s = avctx->priv_data;
1407
1408     ff_rate_control_uninit(s);
1409
1410     MPV_common_end(s);
1411     if ((ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
1412         ff_mjpeg_encode_close(s);
1413
1414     av_freep(&avctx->extradata);
1415
1416     return 0;
1417 }
1418
1419 #endif //CONFIG_ENCODERS
1420
1421 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
1422 {
1423     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1424     uint8_t index_run[MAX_RUN+1];
1425     int last, run, level, start, end, i;
1426
1427     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1428     if(static_store && rl->max_level[0])
1429         return;
1430
1431     /* compute max_level[], max_run[] and index_run[] */
1432     for(last=0;last<2;last++) {
1433         if (last == 0) {
1434             start = 0;
1435             end = rl->last;
1436         } else {
1437             start = rl->last;
1438             end = rl->n;
1439         }
1440
1441         memset(max_level, 0, MAX_RUN + 1);
1442         memset(max_run, 0, MAX_LEVEL + 1);
1443         memset(index_run, rl->n, MAX_RUN + 1);
1444         for(i=start;i<end;i++) {
1445             run = rl->table_run[i];
1446             level = rl->table_level[i];
1447             if (index_run[run] == rl->n)
1448                 index_run[run] = i;
1449             if (level > max_level[run])
1450                 max_level[run] = level;
1451             if (run > max_run[level])
1452                 max_run[level] = run;
1453         }
1454         if(static_store)
1455             rl->max_level[last] = static_store[last];
1456         else
1457             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1458         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1459         if(static_store)
1460             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
1461         else
1462             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1463         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1464         if(static_store)
1465             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1466         else
1467             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1468         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1469     }
1470 }
1471
1472 /* draw the edges of width 'w' of an image of size width, height */
1473 //FIXME check that this is ok for mpeg4 interlaced
1474 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1475 {
1476     uint8_t *ptr, *last_line;
1477     int i;
1478
1479     last_line = buf + (height - 1) * wrap;
1480     for(i=0;i<w;i++) {
1481         /* top and bottom */
1482         memcpy(buf - (i + 1) * wrap, buf, width);
1483         memcpy(last_line + (i + 1) * wrap, last_line, width);
1484     }
1485     /* left and right */
1486     ptr = buf;
1487     for(i=0;i<height;i++) {
1488         memset(ptr - w, ptr[0], w);
1489         memset(ptr + width, ptr[width-1], w);
1490         ptr += wrap;
1491     }
1492     /* corners */
1493     for(i=0;i<w;i++) {
1494         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1495         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1496         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1497         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1498     }
1499 }
1500
1501 int ff_find_unused_picture(MpegEncContext *s, int shared){
1502     int i;
1503
1504     if(shared){
1505         for(i=0; i<MAX_PICTURE_COUNT; i++){
1506             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1507         }
1508     }else{
1509         for(i=0; i<MAX_PICTURE_COUNT; i++){
1510             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1511         }
1512         for(i=0; i<MAX_PICTURE_COUNT; i++){
1513             if(s->picture[i].data[0]==NULL) return i;
1514         }
1515     }
1516
1517     assert(0);
1518     return -1;
1519 }
1520
1521 static void update_noise_reduction(MpegEncContext *s){
1522     int intra, i;
1523
1524     for(intra=0; intra<2; intra++){
1525         if(s->dct_count[intra] > (1<<16)){
1526             for(i=0; i<64; i++){
1527                 s->dct_error_sum[intra][i] >>=1;
1528             }
1529             s->dct_count[intra] >>= 1;
1530         }
1531
1532         for(i=0; i<64; i++){
1533             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1534         }
1535     }
1536 }
1537
1538 /**
1539  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1540  */
1541 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1542 {
1543     int i;
1544     AVFrame *pic;
1545     s->mb_skipped = 0;
1546
1547     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1548
1549     /* mark&release old frames */
1550     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1551       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1552         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1553
1554         /* release forgotten pictures */
1555         /* if(mpeg124/h263) */
1556         if(!s->encoding){
1557             for(i=0; i<MAX_PICTURE_COUNT; i++){
1558                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1559                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1560                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1561                 }
1562             }
1563         }
1564       }
1565     }
1566 alloc:
1567     if(!s->encoding){
1568         /* release non reference frames */
1569         for(i=0; i<MAX_PICTURE_COUNT; i++){
1570             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1571                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1572             }
1573         }
1574
1575         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1576             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1577         else{
1578             i= ff_find_unused_picture(s, 0);
1579             pic= (AVFrame*)&s->picture[i];
1580         }
1581
1582         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1583                         && !s->dropable ? 3 : 0;
1584
1585         pic->coded_picture_number= s->coded_picture_number++;
1586
1587         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1588             return -1;
1589
1590         s->current_picture_ptr= (Picture*)pic;
1591         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1592         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1593     }
1594
1595     s->current_picture_ptr->pict_type= s->pict_type;
1596 //    if(s->flags && CODEC_FLAG_QSCALE)
1597   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1598     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1599
1600     copy_picture(&s->current_picture, s->current_picture_ptr);
1601
1602     if (s->pict_type != B_TYPE) {
1603         s->last_picture_ptr= s->next_picture_ptr;
1604         if(!s->dropable)
1605             s->next_picture_ptr= s->current_picture_ptr;
1606     }
1607 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1608         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1609         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1610         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1611         s->pict_type, s->dropable);*/
1612
1613     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1614     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1615
1616     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1617         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1618         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1619         goto alloc;
1620     }
1621
1622     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1623
1624     if(s->picture_structure!=PICT_FRAME){
1625         int i;
1626         for(i=0; i<4; i++){
1627             if(s->picture_structure == PICT_BOTTOM_FIELD){
1628                  s->current_picture.data[i] += s->current_picture.linesize[i];
1629             }
1630             s->current_picture.linesize[i] *= 2;
1631             s->last_picture.linesize[i] *=2;
1632             s->next_picture.linesize[i] *=2;
1633         }
1634     }
1635
1636     s->hurry_up= s->avctx->hurry_up;
1637     s->error_resilience= avctx->error_resilience;
1638
1639     /* set dequantizer, we can't do it during init as it might change for mpeg4
1640        and we can't do it in the header decode as init is not called for mpeg4 there yet */
1641     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1642         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1643         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1644     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1645         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1646         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1647     }else{
1648         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1649         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1650     }
1651
1652     if(s->dct_error_sum){
1653         assert(s->avctx->noise_reduction && s->encoding);
1654
1655         update_noise_reduction(s);
1656     }
1657
1658 #ifdef HAVE_XVMC
1659     if(s->avctx->xvmc_acceleration)
1660         return XVMC_field_start(s, avctx);
1661 #endif
1662     return 0;
1663 }
1664
1665 /* generic function for encode/decode called after a frame has been coded/decoded */
1666 void MPV_frame_end(MpegEncContext *s)
1667 {
1668     int i;
1669     /* draw edge for correct motion prediction if outside */
1670 #ifdef HAVE_XVMC
1671 //just to make sure that all data is rendered.
1672     if(s->avctx->xvmc_acceleration){
1673         XVMC_field_end(s);
1674     }else
1675 #endif
1676     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1677             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1678             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1679             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1680     }
1681     emms_c();
1682
1683     s->last_pict_type    = s->pict_type;
1684     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1685     if(s->pict_type!=B_TYPE){
1686         s->last_non_b_pict_type= s->pict_type;
1687     }
1688 #if 0
1689         /* copy back current_picture variables */
1690     for(i=0; i<MAX_PICTURE_COUNT; i++){
1691         if(s->picture[i].data[0] == s->current_picture.data[0]){
1692             s->picture[i]= s->current_picture;
1693             break;
1694         }
1695     }
1696     assert(i<MAX_PICTURE_COUNT);
1697 #endif
1698
1699     if(s->encoding){
1700         /* release non-reference frames */
1701         for(i=0; i<MAX_PICTURE_COUNT; i++){
1702             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1703                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1704             }
1705         }
1706     }
1707     // clear copies, to avoid confusion
1708 #if 0
1709     memset(&s->last_picture, 0, sizeof(Picture));
1710     memset(&s->next_picture, 0, sizeof(Picture));
1711     memset(&s->current_picture, 0, sizeof(Picture));
1712 #endif
1713     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1714 }
1715
1716 /**
1717  * draws an line from (ex, ey) -> (sx, sy).
1718  * @param w width of the image
1719  * @param h height of the image
1720  * @param stride stride/linesize of the image
1721  * @param color color of the arrow
1722  */
1723 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1724     int x, y, fr, f;
1725
1726     sx= av_clip(sx, 0, w-1);
1727     sy= av_clip(sy, 0, h-1);
1728     ex= av_clip(ex, 0, w-1);
1729     ey= av_clip(ey, 0, h-1);
1730
1731     buf[sy*stride + sx]+= color;
1732
1733     if(FFABS(ex - sx) > FFABS(ey - sy)){
1734         if(sx > ex){
1735             FFSWAP(int, sx, ex);
1736             FFSWAP(int, sy, ey);
1737         }
1738         buf+= sx + sy*stride;
1739         ex-= sx;
1740         f= ((ey-sy)<<16)/ex;
1741         for(x= 0; x <= ex; x++){
1742             y = (x*f)>>16;
1743             fr= (x*f)&0xFFFF;
1744             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1745             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1746         }
1747     }else{
1748         if(sy > ey){
1749             FFSWAP(int, sx, ex);
1750             FFSWAP(int, sy, ey);
1751         }
1752         buf+= sx + sy*stride;
1753         ey-= sy;
1754         if(ey) f= ((ex-sx)<<16)/ey;
1755         else   f= 0;
1756         for(y= 0; y <= ey; y++){
1757             x = (y*f)>>16;
1758             fr= (y*f)&0xFFFF;
1759             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1760             buf[y*stride + x+1]+= (color*         fr )>>16;;
1761         }
1762     }
1763 }
1764
1765 /**
1766  * draws an arrow from (ex, ey) -> (sx, sy).
1767  * @param w width of the image
1768  * @param h height of the image
1769  * @param stride stride/linesize of the image
1770  * @param color color of the arrow
1771  */
1772 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1773     int dx,dy;
1774
1775     sx= av_clip(sx, -100, w+100);
1776     sy= av_clip(sy, -100, h+100);
1777     ex= av_clip(ex, -100, w+100);
1778     ey= av_clip(ey, -100, h+100);
1779
1780     dx= ex - sx;
1781     dy= ey - sy;
1782
1783     if(dx*dx + dy*dy > 3*3){
1784         int rx=  dx + dy;
1785         int ry= -dx + dy;
1786         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1787
1788         //FIXME subpixel accuracy
1789         rx= ROUNDED_DIV(rx*3<<4, length);
1790         ry= ROUNDED_DIV(ry*3<<4, length);
1791
1792         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1793         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1794     }
1795     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1796 }
1797
1798 /**
1799  * prints debuging info for the given picture.
1800  */
1801 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1802
1803     if(!pict || !pict->mb_type) return;
1804
1805     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1806         int x,y;
1807
1808         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1809         switch (pict->pict_type) {
1810             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1811             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1812             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1813             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1814             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1815             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1816         }
1817         for(y=0; y<s->mb_height; y++){
1818             for(x=0; x<s->mb_width; x++){
1819                 if(s->avctx->debug&FF_DEBUG_SKIP){
1820                     int count= s->mbskip_table[x + y*s->mb_stride];
1821                     if(count>9) count=9;
1822                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1823                 }
1824                 if(s->avctx->debug&FF_DEBUG_QP){
1825                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1826                 }
1827                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1828                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1829                     //Type & MV direction
1830                     if(IS_PCM(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1832                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1834                     else if(IS_INTRA4x4(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1836                     else if(IS_INTRA16x16(mb_type))
1837                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1838                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1839                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1840                     else if(IS_DIRECT(mb_type))
1841                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1842                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1843                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1844                     else if(IS_GMC(mb_type))
1845                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1846                     else if(IS_SKIP(mb_type))
1847                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1848                     else if(!USES_LIST(mb_type, 1))
1849                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1850                     else if(!USES_LIST(mb_type, 0))
1851                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1852                     else{
1853                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1854                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1855                     }
1856
1857                     //segmentation
1858                     if(IS_8X8(mb_type))
1859                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1860                     else if(IS_16X8(mb_type))
1861                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1862                     else if(IS_8X16(mb_type))
1863                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1864                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1865                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1866                     else
1867                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1868
1869
1870                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1871                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1872                     else
1873                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1874                 }
1875 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1876             }
1877             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1878         }
1879     }
1880
1881     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1882         const int shift= 1 + s->quarter_sample;
1883         int mb_y;
1884         uint8_t *ptr;
1885         int i;
1886         int h_chroma_shift, v_chroma_shift;
1887         const int width = s->avctx->width;
1888         const int height= s->avctx->height;
1889         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1890         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1891         s->low_delay=0; //needed to see the vectors without trashing the buffers
1892
1893         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1894         for(i=0; i<3; i++){
1895             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1896             pict->data[i]= s->visualization_buffer[i];
1897         }
1898         pict->type= FF_BUFFER_TYPE_COPY;
1899         ptr= pict->data[0];
1900
1901         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1902             int mb_x;
1903             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1904                 const int mb_index= mb_x + mb_y*s->mb_stride;
1905                 if((s->avctx->debug_mv) && pict->motion_val){
1906                   int type;
1907                   for(type=0; type<3; type++){
1908                     int direction = 0;
1909                     switch (type) {
1910                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1911                                 continue;
1912                               direction = 0;
1913                               break;
1914                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1915                                 continue;
1916                               direction = 0;
1917                               break;
1918                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1919                                 continue;
1920                               direction = 1;
1921                               break;
1922                     }
1923                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1924                         continue;
1925
1926                     if(IS_8X8(pict->mb_type[mb_index])){
1927                       int i;
1928                       for(i=0; i<4; i++){
1929                         int sx= mb_x*16 + 4 + 8*(i&1);
1930                         int sy= mb_y*16 + 4 + 8*(i>>1);
1931                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1932                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1933                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1934                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1935                       }
1936                     }else if(IS_16X8(pict->mb_type[mb_index])){
1937                       int i;
1938                       for(i=0; i<2; i++){
1939                         int sx=mb_x*16 + 8;
1940                         int sy=mb_y*16 + 4 + 8*i;
1941                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1942                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1943                         int my=(pict->motion_val[direction][xy][1]>>shift);
1944
1945                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1946                             my*=2;
1947
1948                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1949                       }
1950                     }else if(IS_8X16(pict->mb_type[mb_index])){
1951                       int i;
1952                       for(i=0; i<2; i++){
1953                         int sx=mb_x*16 + 4 + 8*i;
1954                         int sy=mb_y*16 + 8;
1955                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1956                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1957                         int my=(pict->motion_val[direction][xy][1]>>shift);
1958
1959                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1960                             my*=2;
1961
1962                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1963                       }
1964                     }else{
1965                       int sx= mb_x*16 + 8;
1966                       int sy= mb_y*16 + 8;
1967                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1968                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1969                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1970                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1971                     }
1972                   }
1973                 }
1974                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1975                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1976                     int y;
1977                     for(y=0; y<8; y++){
1978                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1979                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1980                     }
1981                 }
1982                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1983                     int mb_type= pict->mb_type[mb_index];
1984                     uint64_t u,v;
1985                     int y;
1986 #define COLOR(theta, r)\
1987 u= (int)(128 + r*cos(theta*3.141592/180));\
1988 v= (int)(128 + r*sin(theta*3.141592/180));
1989
1990
1991                     u=v=128;
1992                     if(IS_PCM(mb_type)){
1993                         COLOR(120,48)
1994                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1995                         COLOR(30,48)
1996                     }else if(IS_INTRA4x4(mb_type)){
1997                         COLOR(90,48)
1998                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1999 //                        COLOR(120,48)
2000                     }else if(IS_DIRECT(mb_type)){
2001                         COLOR(150,48)
2002                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
2003                         COLOR(170,48)
2004                     }else if(IS_GMC(mb_type)){
2005                         COLOR(190,48)
2006                     }else if(IS_SKIP(mb_type)){
2007 //                        COLOR(180,48)
2008                     }else if(!USES_LIST(mb_type, 1)){
2009                         COLOR(240,48)
2010                     }else if(!USES_LIST(mb_type, 0)){
2011                         COLOR(0,48)
2012                     }else{
2013                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2014                         COLOR(300,48)
2015                     }
2016
2017                     u*= 0x0101010101010101ULL;
2018                     v*= 0x0101010101010101ULL;
2019                     for(y=0; y<8; y++){
2020                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2021                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2022                     }
2023
2024                     //segmentation
2025                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2026                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2027                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2028                     }
2029                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2030                         for(y=0; y<16; y++)
2031                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2032                     }
2033                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2034                         int dm= 1 << (mv_sample_log2-2);
2035                         for(i=0; i<4; i++){
2036                             int sx= mb_x*16 + 8*(i&1);
2037                             int sy= mb_y*16 + 8*(i>>1);
2038                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2039                             //FIXME bidir
2040                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2041                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2042                                 for(y=0; y<8; y++)
2043                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2044                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2045                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2046                         }
2047                     }
2048
2049                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2050                         // hmm
2051                     }
2052                 }
2053                 s->mbskip_table[mb_index]=0;
2054             }
2055         }
2056     }
2057 }
2058
2059 #ifdef CONFIG_ENCODERS
2060
2061 static int get_sae(uint8_t *src, int ref, int stride){
2062     int x,y;
2063     int acc=0;
2064
2065     for(y=0; y<16; y++){
2066         for(x=0; x<16; x++){
2067             acc+= FFABS(src[x+y*stride] - ref);
2068         }
2069     }
2070
2071     return acc;
2072 }
2073
2074 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2075     int x, y, w, h;
2076     int acc=0;
2077
2078     w= s->width &~15;
2079     h= s->height&~15;
2080
2081     for(y=0; y<h; y+=16){
2082         for(x=0; x<w; x+=16){
2083             int offset= x + y*stride;
2084             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2085             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2086             int sae = get_sae(src + offset, mean, stride);
2087
2088             acc+= sae + 500 < sad;
2089         }
2090     }
2091     return acc;
2092 }
2093
2094
2095 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2096     AVFrame *pic=NULL;
2097     int64_t pts;
2098     int i;
2099     const int encoding_delay= s->max_b_frames;
2100     int direct=1;
2101
2102     if(pic_arg){
2103         pts= pic_arg->pts;
2104         pic_arg->display_picture_number= s->input_picture_number++;
2105
2106         if(pts != AV_NOPTS_VALUE){
2107             if(s->user_specified_pts != AV_NOPTS_VALUE){
2108                 int64_t time= pts;
2109                 int64_t last= s->user_specified_pts;
2110
2111                 if(time <= last){
2112                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2113                     return -1;
2114                 }
2115             }
2116             s->user_specified_pts= pts;
2117         }else{
2118             if(s->user_specified_pts != AV_NOPTS_VALUE){
2119                 s->user_specified_pts=
2120                 pts= s->user_specified_pts + 1;
2121                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2122             }else{
2123                 pts= pic_arg->display_picture_number;
2124             }
2125         }
2126     }
2127
2128   if(pic_arg){
2129     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2130     if(pic_arg->linesize[0] != s->linesize) direct=0;
2131     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2132     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2133
2134 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2135
2136     if(direct){
2137         i= ff_find_unused_picture(s, 1);
2138
2139         pic= (AVFrame*)&s->picture[i];
2140         pic->reference= 3;
2141
2142         for(i=0; i<4; i++){
2143             pic->data[i]= pic_arg->data[i];
2144             pic->linesize[i]= pic_arg->linesize[i];
2145         }
2146         alloc_picture(s, (Picture*)pic, 1);
2147     }else{
2148         i= ff_find_unused_picture(s, 0);
2149
2150         pic= (AVFrame*)&s->picture[i];
2151         pic->reference= 3;
2152
2153         alloc_picture(s, (Picture*)pic, 0);
2154
2155         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2156            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2157            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2158        // empty
2159         }else{
2160             int h_chroma_shift, v_chroma_shift;
2161             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2162
2163             for(i=0; i<3; i++){
2164                 int src_stride= pic_arg->linesize[i];
2165                 int dst_stride= i ? s->uvlinesize : s->linesize;
2166                 int h_shift= i ? h_chroma_shift : 0;
2167                 int v_shift= i ? v_chroma_shift : 0;
2168                 int w= s->width >>h_shift;
2169                 int h= s->height>>v_shift;
2170                 uint8_t *src= pic_arg->data[i];
2171                 uint8_t *dst= pic->data[i];
2172
2173                 if(!s->avctx->rc_buffer_size)
2174                     dst +=INPLACE_OFFSET;
2175
2176                 if(src_stride==dst_stride)
2177                     memcpy(dst, src, src_stride*h);
2178                 else{
2179                     while(h--){
2180                         memcpy(dst, src, w);
2181                         dst += dst_stride;
2182                         src += src_stride;
2183                     }
2184                 }
2185             }
2186         }
2187     }
2188     copy_picture_attributes(s, pic, pic_arg);
2189     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2190   }
2191
2192     /* shift buffer entries */
2193     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2194         s->input_picture[i-1]= s->input_picture[i];
2195
2196     s->input_picture[encoding_delay]= (Picture*)pic;
2197
2198     return 0;
2199 }
2200
2201 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2202     int x, y, plane;
2203     int score=0;
2204     int64_t score64=0;
2205
2206     for(plane=0; plane<3; plane++){
2207         const int stride= p->linesize[plane];
2208         const int bw= plane ? 1 : 2;
2209         for(y=0; y<s->mb_height*bw; y++){
2210             for(x=0; x<s->mb_width*bw; x++){
2211                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2212                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2213
2214                 switch(s->avctx->frame_skip_exp){
2215                     case 0: score= FFMAX(score, v); break;
2216                     case 1: score+= FFABS(v);break;
2217                     case 2: score+= v*v;break;
2218                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2219                     case 4: score64+= v*v*(int64_t)(v*v);break;
2220                 }
2221             }
2222         }
2223     }
2224
2225     if(score) score64= score;
2226
2227     if(score64 < s->avctx->frame_skip_threshold)
2228         return 1;
2229     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2230         return 1;
2231     return 0;
2232 }
2233
2234 static int estimate_best_b_count(MpegEncContext *s){
2235     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2236     AVCodecContext *c= avcodec_alloc_context();
2237     AVFrame input[FF_MAX_B_FRAMES+2];
2238     const int scale= s->avctx->brd_scale;
2239     int i, j, out_size, p_lambda, b_lambda, lambda2;
2240     int outbuf_size= s->width * s->height; //FIXME
2241     uint8_t *outbuf= av_malloc(outbuf_size);
2242     int64_t best_rd= INT64_MAX;
2243     int best_b_count= -1;
2244
2245     assert(scale>=0 && scale <=3);
2246
2247 //    emms_c();
2248     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2249     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2250     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2251     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2252
2253     c->width = s->width >> scale;
2254     c->height= s->height>> scale;
2255     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2256     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2257     c->mb_decision= s->avctx->mb_decision;
2258     c->me_cmp= s->avctx->me_cmp;
2259     c->mb_cmp= s->avctx->mb_cmp;
2260     c->me_sub_cmp= s->avctx->me_sub_cmp;
2261     c->pix_fmt = PIX_FMT_YUV420P;
2262     c->time_base= s->avctx->time_base;
2263     c->max_b_frames= s->max_b_frames;
2264
2265     if (avcodec_open(c, codec) < 0)
2266         return -1;
2267
2268     for(i=0; i<s->max_b_frames+2; i++){
2269         int ysize= c->width*c->height;
2270         int csize= (c->width/2)*(c->height/2);
2271         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2272
2273         avcodec_get_frame_defaults(&input[i]);
2274         input[i].data[0]= av_malloc(ysize + 2*csize);
2275         input[i].data[1]= input[i].data[0] + ysize;
2276         input[i].data[2]= input[i].data[1] + csize;
2277         input[i].linesize[0]= c->width;
2278         input[i].linesize[1]=
2279         input[i].linesize[2]= c->width/2;
2280
2281         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2282             pre_input= *pre_input_ptr;
2283
2284             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2285                 pre_input.data[0]+=INPLACE_OFFSET;
2286                 pre_input.data[1]+=INPLACE_OFFSET;
2287                 pre_input.data[2]+=INPLACE_OFFSET;
2288             }
2289
2290             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2291             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2292             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2293         }
2294     }
2295
2296     for(j=0; j<s->max_b_frames+1; j++){
2297         int64_t rd=0;
2298
2299         if(!s->input_picture[j])
2300             break;
2301
2302         c->error[0]= c->error[1]= c->error[2]= 0;
2303
2304         input[0].pict_type= I_TYPE;
2305         input[0].quality= 1 * FF_QP2LAMBDA;
2306         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2307 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2308
2309         for(i=0; i<s->max_b_frames+1; i++){
2310             int is_p= i % (j+1) == j || i==s->max_b_frames;
2311
2312             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2313             input[i+1].quality= is_p ? p_lambda : b_lambda;
2314             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2315             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2316         }
2317
2318         /* get the delayed frames */
2319         while(out_size){
2320             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2321             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2322         }
2323
2324         rd += c->error[0] + c->error[1] + c->error[2];
2325
2326         if(rd < best_rd){
2327             best_rd= rd;
2328             best_b_count= j;
2329         }
2330     }
2331
2332     av_freep(&outbuf);
2333     avcodec_close(c);
2334     av_freep(&c);
2335
2336     for(i=0; i<s->max_b_frames+2; i++){
2337         av_freep(&input[i].data[0]);
2338     }
2339
2340     return best_b_count;
2341 }
2342
2343 static void select_input_picture(MpegEncContext *s){
2344     int i;
2345
2346     for(i=1; i<MAX_PICTURE_COUNT; i++)
2347         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2348     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2349
2350     /* set next picture type & ordering */
2351     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2352         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2353             s->reordered_input_picture[0]= s->input_picture[0];
2354             s->reordered_input_picture[0]->pict_type= I_TYPE;
2355             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2356         }else{
2357             int b_frames;
2358
2359             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2360                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2361                 //FIXME check that te gop check above is +-1 correct
2362 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2363
2364                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2365                         for(i=0; i<4; i++)
2366                             s->input_picture[0]->data[i]= NULL;
2367                         s->input_picture[0]->type= 0;
2368                     }else{
2369                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2370                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2371
2372                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2373                     }
2374
2375                     emms_c();
2376                     ff_vbv_update(s, 0);
2377
2378                     goto no_output_pic;
2379                 }
2380             }
2381
2382             if(s->flags&CODEC_FLAG_PASS2){
2383                 for(i=0; i<s->max_b_frames+1; i++){
2384                     int pict_num= s->input_picture[0]->display_picture_number + i;
2385
2386                     if(pict_num >= s->rc_context.num_entries)
2387                         break;
2388                     if(!s->input_picture[i]){
2389                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2390                         break;
2391                     }
2392
2393                     s->input_picture[i]->pict_type=
2394                         s->rc_context.entry[pict_num].new_pict_type;
2395                 }
2396             }
2397
2398             if(s->avctx->b_frame_strategy==0){
2399                 b_frames= s->max_b_frames;
2400                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2401             }else if(s->avctx->b_frame_strategy==1){
2402                 for(i=1; i<s->max_b_frames+1; i++){
2403                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2404                         s->input_picture[i]->b_frame_score=
2405                             get_intra_count(s, s->input_picture[i  ]->data[0],
2406                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2407                     }
2408                 }
2409                 for(i=0; i<s->max_b_frames+1; i++){
2410                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2411                 }
2412
2413                 b_frames= FFMAX(0, i-1);
2414
2415                 /* reset scores */
2416                 for(i=0; i<b_frames+1; i++){
2417                     s->input_picture[i]->b_frame_score=0;
2418                 }
2419             }else if(s->avctx->b_frame_strategy==2){
2420                 b_frames= estimate_best_b_count(s);
2421             }else{
2422                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2423                 b_frames=0;
2424             }
2425
2426             emms_c();
2427 //static int b_count=0;
2428 //b_count+= b_frames;
2429 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2430
2431             for(i= b_frames - 1; i>=0; i--){
2432                 int type= s->input_picture[i]->pict_type;
2433                 if(type && type != B_TYPE)
2434                     b_frames= i;
2435             }
2436             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2437                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2438             }
2439
2440             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2441               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2442                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2443               }else{
2444                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2445                     b_frames=0;
2446                 s->input_picture[b_frames]->pict_type= I_TYPE;
2447               }
2448             }
2449
2450             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2451                && b_frames
2452                && s->input_picture[b_frames]->pict_type== I_TYPE)
2453                 b_frames--;
2454
2455             s->reordered_input_picture[0]= s->input_picture[b_frames];
2456             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2457                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2458             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2459             for(i=0; i<b_frames; i++){
2460                 s->reordered_input_picture[i+1]= s->input_picture[i];
2461                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2462                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2463             }
2464         }
2465     }
2466 no_output_pic:
2467     if(s->reordered_input_picture[0]){
2468         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2469
2470         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2471
2472         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2473             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2474
2475             int i= ff_find_unused_picture(s, 0);
2476             Picture *pic= &s->picture[i];
2477
2478             pic->reference              = s->reordered_input_picture[0]->reference;
2479             alloc_picture(s, pic, 0);
2480
2481             /* mark us unused / free shared pic */
2482             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2483                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2484             for(i=0; i<4; i++)
2485                 s->reordered_input_picture[0]->data[i]= NULL;
2486             s->reordered_input_picture[0]->type= 0;
2487
2488             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2489
2490             s->current_picture_ptr= pic;
2491         }else{
2492             // input is not a shared pix -> reuse buffer for current_pix
2493
2494             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2495                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2496
2497             s->current_picture_ptr= s->reordered_input_picture[0];
2498             for(i=0; i<4; i++){
2499                 s->new_picture.data[i]+= INPLACE_OFFSET;
2500             }
2501         }
2502         copy_picture(&s->current_picture, s->current_picture_ptr);
2503
2504         s->picture_number= s->new_picture.display_picture_number;
2505 //printf("dpn:%d\n", s->picture_number);
2506     }else{
2507        memset(&s->new_picture, 0, sizeof(Picture));
2508     }
2509 }
2510
2511 int MPV_encode_picture(AVCodecContext *avctx,
2512                        unsigned char *buf, int buf_size, void *data)
2513 {
2514     MpegEncContext *s = avctx->priv_data;
2515     AVFrame *pic_arg = data;
2516     int i, stuffing_count;
2517
2518     for(i=0; i<avctx->thread_count; i++){
2519         int start_y= s->thread_context[i]->start_mb_y;
2520         int   end_y= s->thread_context[i]->  end_mb_y;
2521         int h= s->mb_height;
2522         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2523         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2524
2525         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2526     }
2527
2528     s->picture_in_gop_number++;
2529
2530     if(load_input_picture(s, pic_arg) < 0)
2531         return -1;
2532
2533     select_input_picture(s);
2534
2535     /* output? */
2536     if(s->new_picture.data[0]){
2537         s->pict_type= s->new_picture.pict_type;
2538 //emms_c();
2539 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2540         MPV_frame_start(s, avctx);
2541 vbv_retry:
2542         if (encode_picture(s, s->picture_number) < 0)
2543             return -1;
2544
2545         avctx->real_pict_num  = s->picture_number;
2546         avctx->header_bits = s->header_bits;
2547         avctx->mv_bits     = s->mv_bits;
2548         avctx->misc_bits   = s->misc_bits;
2549         avctx->i_tex_bits  = s->i_tex_bits;
2550         avctx->p_tex_bits  = s->p_tex_bits;
2551         avctx->i_count     = s->i_count;
2552         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2553         avctx->skip_count  = s->skip_count;
2554
2555         MPV_frame_end(s);
2556
2557         if (ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
2558             ff_mjpeg_encode_picture_trailer(s);
2559
2560         if(avctx->rc_buffer_size){
2561             RateControlContext *rcc= &s->rc_context;
2562             int max_size= rcc->buffer_index/3;
2563
2564             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2565                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2566                 if(s->adaptive_quant){
2567                     int i;
2568                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2569                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2570                 }
2571                 s->mb_skipped = 0;        //done in MPV_frame_start()
2572                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2573                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2574                         s->no_rounding ^= 1;
2575                 }
2576                 if(s->pict_type!=B_TYPE){
2577                     s->time_base= s->last_time_base;
2578                     s->last_non_b_time= s->time - s->pp_time;
2579                 }
2580 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2581                 for(i=0; i<avctx->thread_count; i++){
2582                     PutBitContext *pb= &s->thread_context[i]->pb;
2583                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2584                 }
2585                 goto vbv_retry;
2586             }
2587
2588             assert(s->avctx->rc_max_rate);
2589         }
2590
2591         if(s->flags&CODEC_FLAG_PASS1)
2592             ff_write_pass1_stats(s);
2593
2594         for(i=0; i<4; i++){
2595             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2596             avctx->error[i] += s->current_picture_ptr->error[i];
2597         }
2598
2599         if(s->flags&CODEC_FLAG_PASS1)
2600             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2601         flush_put_bits(&s->pb);
2602         s->frame_bits  = put_bits_count(&s->pb);
2603
2604         stuffing_count= ff_vbv_update(s, s->frame_bits);
2605         if(stuffing_count){
2606             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2607                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2608                 return -1;
2609             }
2610
2611             switch(s->codec_id){
2612             case CODEC_ID_MPEG1VIDEO:
2613             case CODEC_ID_MPEG2VIDEO:
2614                 while(stuffing_count--){
2615                     put_bits(&s->pb, 8, 0);
2616                 }
2617             break;
2618             case CODEC_ID_MPEG4:
2619                 put_bits(&s->pb, 16, 0);
2620                 put_bits(&s->pb, 16, 0x1C3);
2621                 stuffing_count -= 4;
2622                 while(stuffing_count--){
2623                     put_bits(&s->pb, 8, 0xFF);
2624                 }
2625             break;
2626             default:
2627                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2628             }
2629             flush_put_bits(&s->pb);
2630             s->frame_bits  = put_bits_count(&s->pb);
2631         }
2632
2633         /* update mpeg1/2 vbv_delay for CBR */
2634         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2635            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2636             int vbv_delay;
2637
2638             assert(s->repeat_first_field==0);
2639
2640             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2641             assert(vbv_delay < 0xFFFF);
2642
2643             s->vbv_delay_ptr[0] &= 0xF8;
2644             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2645             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2646             s->vbv_delay_ptr[2] &= 0x07;
2647             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2648         }
2649         s->total_bits += s->frame_bits;
2650         avctx->frame_bits  = s->frame_bits;
2651     }else{
2652         assert((pbBufPtr(&s->pb) == s->pb.buf));
2653         s->frame_bits=0;
2654     }
2655     assert((s->frame_bits&7)==0);
2656
2657     return s->frame_bits/8;
2658 }
2659
2660 #endif //CONFIG_ENCODERS
2661
2662 static inline void gmc1_motion(MpegEncContext *s,
2663                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2664                                uint8_t **ref_picture)
2665 {
2666     uint8_t *ptr;
2667     int offset, src_x, src_y, linesize, uvlinesize;
2668     int motion_x, motion_y;
2669     int emu=0;
2670
2671     motion_x= s->sprite_offset[0][0];
2672     motion_y= s->sprite_offset[0][1];
2673     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2674     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2675     motion_x<<=(3-s->sprite_warping_accuracy);
2676     motion_y<<=(3-s->sprite_warping_accuracy);
2677     src_x = av_clip(src_x, -16, s->width);
2678     if (src_x == s->width)
2679         motion_x =0;
2680     src_y = av_clip(src_y, -16, s->height);
2681     if (src_y == s->height)
2682         motion_y =0;
2683
2684     linesize = s->linesize;
2685     uvlinesize = s->uvlinesize;
2686
2687     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2688
2689     if(s->flags&CODEC_FLAG_EMU_EDGE){
2690         if(   (unsigned)src_x >= s->h_edge_pos - 17
2691            || (unsigned)src_y >= s->v_edge_pos - 17){
2692             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2693             ptr= s->edge_emu_buffer;
2694         }
2695     }
2696
2697     if((motion_x|motion_y)&7){
2698         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2699         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2700     }else{
2701         int dxy;
2702
2703         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2704         if (s->no_rounding){
2705             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2706         }else{
2707             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2708         }
2709     }
2710
2711     if(s->flags&CODEC_FLAG_GRAY) return;
2712
2713     motion_x= s->sprite_offset[1][0];
2714     motion_y= s->sprite_offset[1][1];
2715     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2716     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2717     motion_x<<=(3-s->sprite_warping_accuracy);
2718     motion_y<<=(3-s->sprite_warping_accuracy);
2719     src_x = av_clip(src_x, -8, s->width>>1);
2720     if (src_x == s->width>>1)
2721         motion_x =0;
2722     src_y = av_clip(src_y, -8, s->height>>1);
2723     if (src_y == s->height>>1)
2724         motion_y =0;
2725
2726     offset = (src_y * uvlinesize) + src_x;
2727     ptr = ref_picture[1] + offset;
2728     if(s->flags&CODEC_FLAG_EMU_EDGE){
2729         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2730            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2731             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2732             ptr= s->edge_emu_buffer;
2733             emu=1;
2734         }
2735     }
2736     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2737
2738     ptr = ref_picture[2] + offset;
2739     if(emu){
2740         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2741         ptr= s->edge_emu_buffer;
2742     }
2743     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2744
2745     return;
2746 }
2747
2748 static inline void gmc_motion(MpegEncContext *s,
2749                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2750                                uint8_t **ref_picture)
2751 {
2752     uint8_t *ptr;
2753     int linesize, uvlinesize;
2754     const int a= s->sprite_warping_accuracy;
2755     int ox, oy;
2756
2757     linesize = s->linesize;
2758     uvlinesize = s->uvlinesize;
2759
2760     ptr = ref_picture[0];
2761
2762     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2763     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2764
2765     s->dsp.gmc(dest_y, ptr, linesize, 16,
2766            ox,
2767            oy,
2768            s->sprite_delta[0][0], s->sprite_delta[0][1],
2769            s->sprite_delta[1][0], s->sprite_delta[1][1],
2770            a+1, (1<<(2*a+1)) - s->no_rounding,
2771            s->h_edge_pos, s->v_edge_pos);
2772     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2773            ox + s->sprite_delta[0][0]*8,
2774            oy + s->sprite_delta[1][0]*8,
2775            s->sprite_delta[0][0], s->sprite_delta[0][1],
2776            s->sprite_delta[1][0], s->sprite_delta[1][1],
2777            a+1, (1<<(2*a+1)) - s->no_rounding,
2778            s->h_edge_pos, s->v_edge_pos);
2779
2780     if(s->flags&CODEC_FLAG_GRAY) return;
2781
2782     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2783     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2784
2785     ptr = ref_picture[1];
2786     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2787            ox,
2788            oy,
2789            s->sprite_delta[0][0], s->sprite_delta[0][1],
2790            s->sprite_delta[1][0], s->sprite_delta[1][1],
2791            a+1, (1<<(2*a+1)) - s->no_rounding,
2792            s->h_edge_pos>>1, s->v_edge_pos>>1);
2793
2794     ptr = ref_picture[2];
2795     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2796            ox,
2797            oy,
2798            s->sprite_delta[0][0], s->sprite_delta[0][1],
2799            s->sprite_delta[1][0], s->sprite_delta[1][1],
2800            a+1, (1<<(2*a+1)) - s->no_rounding,
2801            s->h_edge_pos>>1, s->v_edge_pos>>1);
2802 }
2803
2804 /**
2805  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2806  * @param buf destination buffer
2807  * @param src source buffer
2808  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2809  * @param block_w width of block
2810  * @param block_h height of block
2811  * @param src_x x coordinate of the top left sample of the block in the source buffer
2812  * @param src_y y coordinate of the top left sample of the block in the source buffer
2813  * @param w width of the source buffer
2814  * @param h height of the source buffer
2815  */
2816 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2817                                     int src_x, int src_y, int w, int h){
2818     int x, y;
2819     int start_y, start_x, end_y, end_x;
2820
2821     if(src_y>= h){
2822         src+= (h-1-src_y)*linesize;
2823         src_y=h-1;
2824     }else if(src_y<=-block_h){
2825         src+= (1-block_h-src_y)*linesize;
2826         src_y=1-block_h;
2827     }
2828     if(src_x>= w){
2829         src+= (w-1-src_x);
2830         src_x=w-1;
2831     }else if(src_x<=-block_w){
2832         src+= (1-block_w-src_x);
2833         src_x=1-block_w;
2834     }
2835
2836     start_y= FFMAX(0, -src_y);
2837     start_x= FFMAX(0, -src_x);
2838     end_y= FFMIN(block_h, h-src_y);
2839     end_x= FFMIN(block_w, w-src_x);
2840
2841     // copy existing part
2842     for(y=start_y; y<end_y; y++){
2843         for(x=start_x; x<end_x; x++){
2844             buf[x + y*linesize]= src[x + y*linesize];
2845         }
2846     }
2847
2848     //top
2849     for(y=0; y<start_y; y++){
2850         for(x=start_x; x<end_x; x++){
2851             buf[x + y*linesize]= buf[x + start_y*linesize];
2852         }
2853     }
2854
2855     //bottom
2856     for(y=end_y; y<block_h; y++){
2857         for(x=start_x; x<end_x; x++){
2858             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2859         }
2860     }
2861
2862     for(y=0; y<block_h; y++){
2863        //left
2864         for(x=0; x<start_x; x++){
2865             buf[x + y*linesize]= buf[start_x + y*linesize];
2866         }
2867
2868        //right
2869         for(x=end_x; x<block_w; x++){
2870             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2871         }
2872     }
2873 }
2874
2875 static inline int hpel_motion(MpegEncContext *s,
2876                                   uint8_t *dest, uint8_t *src,
2877                                   int field_based, int field_select,
2878                                   int src_x, int src_y,
2879                                   int width, int height, int stride,
2880                                   int h_edge_pos, int v_edge_pos,
2881                                   int w, int h, op_pixels_func *pix_op,
2882                                   int motion_x, int motion_y)
2883 {
2884     int dxy;
2885     int emu=0;
2886
2887     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2888     src_x += motion_x >> 1;
2889     src_y += motion_y >> 1;
2890
2891     /* WARNING: do no forget half pels */
2892     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2893     if (src_x == width)
2894         dxy &= ~1;
2895     src_y = av_clip(src_y, -16, height);
2896     if (src_y == height)
2897         dxy &= ~2;
2898     src += src_y * stride + src_x;
2899
2900     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2901         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2902            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2903             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2904                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2905             src= s->edge_emu_buffer;
2906             emu=1;
2907         }
2908     }
2909     if(field_select)
2910         src += s->linesize;
2911     pix_op[dxy](dest, src, stride, h);
2912     return emu;
2913 }
2914
2915 static inline int hpel_motion_lowres(MpegEncContext *s,
2916                                   uint8_t *dest, uint8_t *src,
2917                                   int field_based, int field_select,
2918                                   int src_x, int src_y,
2919                                   int width, int height, int stride,
2920                                   int h_edge_pos, int v_edge_pos,
2921                                   int w, int h, h264_chroma_mc_func *pix_op,
2922                                   int motion_x, int motion_y)
2923 {
2924     const int lowres= s->avctx->lowres;
2925     const int s_mask= (2<<lowres)-1;
2926     int emu=0;
2927     int sx, sy;
2928
2929     if(s->quarter_sample){
2930         motion_x/=2;
2931         motion_y/=2;
2932     }
2933
2934     sx= motion_x & s_mask;
2935     sy= motion_y & s_mask;
2936     src_x += motion_x >> (lowres+1);
2937     src_y += motion_y >> (lowres+1);
2938
2939     src += src_y * stride + src_x;
2940
2941     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2942        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2943         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2944                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2945         src= s->edge_emu_buffer;
2946         emu=1;
2947     }
2948
2949     sx <<= 2 - lowres;
2950     sy <<= 2 - lowres;
2951     if(field_select)
2952         src += s->linesize;
2953     pix_op[lowres](dest, src, stride, h, sx, sy);
2954     return emu;
2955 }
2956
2957 /* apply one mpeg motion vector to the three components */
2958 static av_always_inline void mpeg_motion(MpegEncContext *s,
2959                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2960                                int field_based, int bottom_field, int field_select,
2961                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2962                                int motion_x, int motion_y, int h)
2963 {
2964     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2965     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2966
2967 #if 0
2968 if(s->quarter_sample)
2969 {
2970     motion_x>>=1;
2971     motion_y>>=1;
2972 }
2973 #endif
2974
2975     v_edge_pos = s->v_edge_pos >> field_based;
2976     linesize   = s->current_picture.linesize[0] << field_based;
2977     uvlinesize = s->current_picture.linesize[1] << field_based;
2978
2979     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2980     src_x = s->mb_x* 16               + (motion_x >> 1);
2981     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2982
2983     if (s->out_format == FMT_H263) {
2984         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2985             mx = (motion_x>>1)|(motion_x&1);
2986             my = motion_y >>1;
2987             uvdxy = ((my & 1) << 1) | (mx & 1);
2988             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2989             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2990         }else{
2991             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2992             uvsrc_x = src_x>>1;
2993             uvsrc_y = src_y>>1;
2994         }
2995     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2996         mx = motion_x / 4;
2997         my = motion_y / 4;
2998         uvdxy = 0;
2999         uvsrc_x = s->mb_x*8 + mx;
3000         uvsrc_y = s->mb_y*8 + my;
3001     } else {
3002         if(s->chroma_y_shift){
3003             mx = motion_x / 2;
3004             my = motion_y / 2;
3005             uvdxy = ((my & 1) << 1) | (mx & 1);
3006             uvsrc_x = s->mb_x* 8               + (mx >> 1);
3007             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
3008         } else {
3009             if(s->chroma_x_shift){
3010             //Chroma422
3011                 mx = motion_x / 2;
3012                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3013                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3014                 uvsrc_y = src_y;
3015             } else {
3016             //Chroma444
3017                 uvdxy = dxy;
3018                 uvsrc_x = src_x;
3019                 uvsrc_y = src_y;
3020             }
3021         }
3022     }
3023
3024     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3025     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3026     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3027
3028     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3029        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3030             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3031                s->codec_id == CODEC_ID_MPEG1VIDEO){
3032                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3033                 return ;
3034             }
3035             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3036                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3037             ptr_y = s->edge_emu_buffer;
3038             if(!(s->flags&CODEC_FLAG_GRAY)){
3039                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3040                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3041                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3042                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3043                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3044                 ptr_cb= uvbuf;
3045                 ptr_cr= uvbuf+16;
3046             }
3047     }
3048
3049     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3050         dest_y += s->linesize;
3051         dest_cb+= s->uvlinesize;
3052         dest_cr+= s->uvlinesize;
3053     }
3054
3055     if(field_select){
3056         ptr_y += s->linesize;
3057         ptr_cb+= s->uvlinesize;
3058         ptr_cr+= s->uvlinesize;
3059     }
3060
3061     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3062
3063     if(!(s->flags&CODEC_FLAG_GRAY)){
3064         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3065         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3066     }
3067     if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) && s->out_format == FMT_H261){
3068         ff_h261_loop_filter(s);
3069     }
3070 }
3071
3072 /* apply one mpeg motion vector to the three components */
3073 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3074                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3075                                int field_based, int bottom_field, int field_select,
3076                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3077                                int motion_x, int motion_y, int h)
3078 {
3079     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3080     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3081     const int lowres= s->avctx->lowres;
3082     const int block_s= 8>>lowres;
3083     const int s_mask= (2<<lowres)-1;
3084     const int h_edge_pos = s->h_edge_pos >> lowres;
3085     const int v_edge_pos = s->v_edge_pos >> lowres;
3086     linesize   = s->current_picture.linesize[0] << field_based;
3087     uvlinesize = s->current_picture.linesize[1] << field_based;
3088
3089     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3090         motion_x/=2;
3091         motion_y/=2;
3092     }
3093
3094     if(field_based){
3095         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3096     }
3097
3098     sx= motion_x & s_mask;
3099     sy= motion_y & s_mask;
3100     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3101     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3102
3103     if (s->out_format == FMT_H263) {
3104         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3105         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3106         uvsrc_x = src_x>>1;
3107         uvsrc_y = src_y>>1;
3108     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3109         mx = motion_x / 4;
3110         my = motion_y / 4;
3111         uvsx = (2*mx) & s_mask;
3112         uvsy = (2*my) & s_mask;
3113         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3114         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3115     } else {
3116         mx = motion_x / 2;
3117         my = motion_y / 2;
3118         uvsx = mx & s_mask;
3119         uvsy = my & s_mask;
3120         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3121         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3122     }
3123
3124     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3125     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3126     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3127
3128     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3129        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3130             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3131                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3132             ptr_y = s->edge_emu_buffer;
3133             if(!(s->flags&CODEC_FLAG_GRAY)){
3134                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3135                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3136                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3137                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3138                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3139                 ptr_cb= uvbuf;
3140                 ptr_cr= uvbuf+16;
3141             }
3142     }
3143
3144     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3145         dest_y += s->linesize;
3146         dest_cb+= s->uvlinesize;
3147         dest_cr+= s->uvlinesize;
3148     }
3149
3150     if(field_select){
3151         ptr_y += s->linesize;
3152         ptr_cb+= s->uvlinesize;
3153         ptr_cr+= s->uvlinesize;
3154     }
3155
3156     sx <<= 2 - lowres;
3157     sy <<= 2 - lowres;
3158     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3159
3160     if(!(s->flags&CODEC_FLAG_GRAY)){
3161         uvsx <<= 2 - lowres;
3162         uvsy <<= 2 - lowres;
3163         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3164         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3165     }
3166     //FIXME h261 lowres loop filter
3167 }
3168
3169 //FIXME move to dsputil, avg variant, 16x16 version
3170 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3171     int x;
3172     uint8_t * const top   = src[1];
3173     uint8_t * const left  = src[2];
3174     uint8_t * const mid   = src[0];
3175     uint8_t * const right = src[3];
3176     uint8_t * const bottom= src[4];
3177 #define OBMC_FILTER(x, t, l, m, r, b)\
3178     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3179 #define OBMC_FILTER4(x, t, l, m, r, b)\
3180     OBMC_FILTER(x         , t, l, m, r, b);\
3181     OBMC_FILTER(x+1       , t, l, m, r, b);\
3182     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3183     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3184
3185     x=0;
3186     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3187     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3188     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3189     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3190     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3191     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3192     x+= stride;
3193     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3194     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3195     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3196     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3197     x+= stride;
3198     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3199     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3200     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3201     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3202     x+= 2*stride;
3203     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3204     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3205     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3206     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3207     x+= 2*stride;
3208     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3209     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3210     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3211     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3212     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3213     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3214     x+= stride;
3215     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3216     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3217     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3218     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3219 }
3220
3221 /* obmc for 1 8x8 luma block */
3222 static inline void obmc_motion(MpegEncContext *s,
3223                                uint8_t *dest, uint8_t *src,
3224                                int src_x, int src_y,
3225                                op_pixels_func *pix_op,
3226                                int16_t mv[5][2]/* mid top left right bottom*/)
3227 #define MID    0
3228 {
3229     int i;
3230     uint8_t *ptr[5];
3231
3232     assert(s->quarter_sample==0);
3233
3234     for(i=0; i<5; i++){
3235         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3236             ptr[i]= ptr[MID];
3237         }else{
3238             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3239             hpel_motion(s, ptr[i], src, 0, 0,
3240                         src_x, src_y,
3241                         s->width, s->height, s->linesize,
3242                         s->h_edge_pos, s->v_edge_pos,
3243                         8, 8, pix_op,
3244                         mv[i][0], mv[i][1]);
3245         }
3246     }
3247
3248     put_obmc(dest, ptr, s->linesize);
3249 }
3250
3251 static inline void qpel_motion(MpegEncContext *s,
3252                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3253                                int field_based, int bottom_field, int field_select,
3254                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3255                                qpel_mc_func (*qpix_op)[16],
3256                                int motion_x, int motion_y, int h)
3257 {
3258     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3259     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3260
3261     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3262     src_x = s->mb_x *  16                 + (motion_x >> 2);
3263     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3264
3265     v_edge_pos = s->v_edge_pos >> field_based;
3266     linesize = s->linesize << field_based;
3267     uvlinesize = s->uvlinesize << field_based;
3268
3269     if(field_based){
3270         mx= motion_x/2;
3271         my= motion_y>>1;
3272     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3273         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3274         mx= (motion_x>>1) + rtab[motion_x&7];
3275         my= (motion_y>>1) + rtab[motion_y&7];
3276     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3277         mx= (motion_x>>1)|(motion_x&1);
3278         my= (motion_y>>1)|(motion_y&1);
3279     }else{
3280         mx= motion_x/2;
3281         my= motion_y/2;
3282     }
3283     mx= (mx>>1)|(mx&1);
3284     my= (my>>1)|(my&1);
3285
3286     uvdxy= (mx&1) | ((my&1)<<1);
3287     mx>>=1;
3288     my>>=1;
3289
3290     uvsrc_x = s->mb_x *  8                 + mx;
3291     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3292
3293     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3294     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3295     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3296
3297     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3298        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3299         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3300                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3301         ptr_y= s->edge_emu_buffer;
3302         if(!(s->flags&CODEC_FLAG_GRAY)){
3303             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3304             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3305                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3306             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3307                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3308             ptr_cb= uvbuf;
3309             ptr_cr= uvbuf + 16;
3310         }
3311     }
3312
3313     if(!field_based)
3314         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3315     else{
3316         if(bottom_field){
3317             dest_y += s->linesize;
3318             dest_cb+= s->uvlinesize;
3319             dest_cr+= s->uvlinesize;
3320         }
3321
3322         if(field_select){
3323             ptr_y  += s->linesize;
3324             ptr_cb += s->uvlinesize;
3325             ptr_cr += s->uvlinesize;
3326         }
3327         //damn interlaced mode
3328         //FIXME boundary mirroring is not exactly correct here
3329         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3330         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3331     }
3332     if(!(s->flags&CODEC_FLAG_GRAY)){
3333         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3334         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3335     }
3336 }
3337
3338 inline int ff_h263_round_chroma(int x){
3339     if (x >= 0)
3340         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3341     else {
3342         x = -x;
3343         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3344     }
3345 }
3346
3347 /**
3348  * h263 chorma 4mv motion compensation.
3349  */
3350 static inline void chroma_4mv_motion(MpegEncContext *s,
3351                                      uint8_t *dest_cb, uint8_t *dest_cr,
3352                                      uint8_t **ref_picture,
3353                                      op_pixels_func *pix_op,
3354                                      int mx, int my){
3355     int dxy, emu=0, src_x, src_y, offset;
3356     uint8_t *ptr;
3357
3358     /* In case of 8X8, we construct a single chroma motion vector
3359        with a special rounding */
3360     mx= ff_h263_round_chroma(mx);
3361     my= ff_h263_round_chroma(my);
3362
3363     dxy = ((my & 1) << 1) | (mx & 1);
3364     mx >>= 1;
3365     my >>= 1;
3366
3367     src_x = s->mb_x * 8 + mx;
3368     src_y = s->mb_y * 8 + my;
3369     src_x = av_clip(src_x, -8, s->width/2);
3370     if (src_x == s->width/2)
3371         dxy &= ~1;
3372     src_y = av_clip(src_y, -8, s->height/2);
3373     if (src_y == s->height/2)
3374         dxy &= ~2;
3375
3376     offset = (src_y * (s->uvlinesize)) + src_x;
3377     ptr = ref_picture[1] + offset;
3378     if(s->flags&CODEC_FLAG_EMU_EDGE){
3379         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3380            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3381             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3382             ptr= s->edge_emu_buffer;
3383             emu=1;
3384         }
3385     }
3386     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3387
3388     ptr = ref_picture[2] + offset;
3389     if(emu){
3390         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3391         ptr= s->edge_emu_buffer;
3392     }
3393     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3394 }
3395
3396 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3397                                      uint8_t *dest_cb, uint8_t *dest_cr,
3398                                      uint8_t **ref_picture,
3399                                      h264_chroma_mc_func *pix_op,
3400                                      int mx, int my){
3401     const int lowres= s->avctx->lowres;
3402     const int block_s= 8>>lowres;
3403     const int s_mask= (2<<lowres)-1;
3404     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3405     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3406     int emu=0, src_x, src_y, offset, sx, sy;
3407     uint8_t *ptr;
3408
3409     if(s->quarter_sample){
3410         mx/=2;
3411         my/=2;
3412     }
3413
3414     /* In case of 8X8, we construct a single chroma motion vector
3415        with a special rounding */
3416     mx= ff_h263_round_chroma(mx);
3417     my= ff_h263_round_chroma(my);
3418
3419     sx= mx & s_mask;
3420     sy= my & s_mask;
3421     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3422     src_y = s->mb_y*block_s + (my >> (lowres+1));
3423
3424     offset = src_y * s->uvlinesize + src_x;
3425     ptr = ref_picture[1] + offset;
3426     if(s->flags&CODEC_FLAG_EMU_EDGE){
3427         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3428            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3429             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3430             ptr= s->edge_emu_buffer;
3431             emu=1;
3432         }
3433     }
3434     sx <<= 2 - lowres;
3435     sy <<= 2 - lowres;
3436     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3437
3438     ptr = ref_picture[2] + offset;
3439     if(emu){
3440         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3441         ptr= s->edge_emu_buffer;
3442     }
3443     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3444 }
3445
3446 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3447     /* fetch pixels for estimated mv 4 macroblocks ahead
3448      * optimized for 64byte cache lines */
3449     const int shift = s->quarter_sample ? 2 : 1;
3450     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3451     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3452     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3453     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3454     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3455     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3456 }
3457
3458 /**
3459  * motion compensation of a single macroblock
3460  * @param s context
3461  * @param dest_y luma destination pointer
3462  * @param dest_cb chroma cb/u destination pointer
3463  * @param dest_cr chroma cr/v destination pointer
3464  * @param dir direction (0->forward, 1->backward)
3465  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3466  * @param pic_op halfpel motion compensation function (average or put normally)
3467  * @param pic_op qpel motion compensation function (average or put normally)
3468  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3469  */
3470 static inline void MPV_motion(MpegEncContext *s,
3471                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3472                               int dir, uint8_t **ref_picture,
3473                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3474 {
3475     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3476     int mb_x, mb_y, i;
3477     uint8_t *ptr, *dest;
3478
3479     mb_x = s->mb_x;
3480     mb_y = s->mb_y;
3481
3482     prefetch_motion(s, ref_picture, dir);
3483
3484     if(s->obmc && s->pict_type != B_TYPE){
3485         int16_t mv_cache[4][4][2];
3486         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3487         const int mot_stride= s->b8_stride;
3488         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3489
3490         assert(!s->mb_skipped);
3491
3492         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3493         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3494         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3495
3496         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3497             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3498         }else{
3499             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3500         }
3501
3502         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3503             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3504             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3505         }else{
3506             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3507             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3508         }
3509
3510         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3511             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3512             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3513         }else{
3514             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3515             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3516         }
3517
3518         mx = 0;
3519         my = 0;
3520         for(i=0;i<4;i++) {
3521             const int x= (i&1)+1;
3522             const int y= (i>>1)+1;
3523             int16_t mv[5][2]= {
3524                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3525                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3526                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3527                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3528                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3529             //FIXME cleanup
3530             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3531                         ref_picture[0],
3532                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3533                         pix_op[1],
3534                         mv);
3535
3536             mx += mv[0][0];
3537             my += mv[0][1];
3538         }
3539         if(!(s->flags&CODEC_FLAG_GRAY))
3540             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3541
3542         return;
3543     }
3544
3545     switch(s->mv_type) {
3546     case MV_TYPE_16X16:
3547         if(s->mcsel){
3548             if(s->real_sprite_warping_points==1){
3549                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3550                             ref_picture);
3551             }else{
3552                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3553                             ref_picture);
3554             }
3555         }else if(s->quarter_sample){
3556             qpel_motion(s, dest_y, dest_cb, dest_cr,
3557                         0, 0, 0,
3558                         ref_picture, pix_op, qpix_op,
3559                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3560         }else if(ENABLE_WMV2 && s->mspel){
3561             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3562                         ref_picture, pix_op,
3563                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3564         }else
3565         {
3566             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3567                         0, 0, 0,
3568                         ref_picture, pix_op,
3569                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3570         }
3571         break;
3572     case MV_TYPE_8X8:
3573         mx = 0;
3574         my = 0;
3575         if(s->quarter_sample){
3576             for(i=0;i<4;i++) {
3577                 motion_x = s->mv[dir][i][0];
3578                 motion_y = s->mv[dir][i][1];
3579
3580                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3581                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3582                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3583
3584                 /* WARNING: do no forget half pels */
3585                 src_x = av_clip(src_x, -16, s->width);
3586                 if (src_x == s->width)
3587                     dxy &= ~3;
3588                 src_y = av_clip(src_y, -16, s->height);
3589                 if (src_y == s->height)
3590                     dxy &= ~12;
3591
3592                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3593                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3594                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3595                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3596                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3597                         ptr= s->edge_emu_buffer;
3598                     }
3599                 }
3600                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3601                 qpix_op[1][dxy](dest, ptr, s->linesize);
3602
3603                 mx += s->mv[dir][i][0]/2;
3604                 my += s->mv[dir][i][1]/2;
3605             }
3606         }else{
3607             for(i=0;i<4;i++) {
3608                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3609                             ref_picture[0], 0, 0,
3610                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3611                             s->width, s->height, s->linesize,
3612                             s->h_edge_pos, s->v_edge_pos,
3613                             8, 8, pix_op[1],
3614                             s->mv[dir][i][0], s->mv[dir][i][1]);
3615
3616                 mx += s->mv[dir][i][0];
3617                 my += s->mv[dir][i][1];
3618             }
3619         }
3620
3621         if(!(s->flags&CODEC_FLAG_GRAY))
3622             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3623         break;
3624     case MV_TYPE_FIELD:
3625         if (s->picture_structure == PICT_FRAME) {
3626             if(s->quarter_sample){
3627                 for(i=0; i<2; i++){
3628                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3629                                 1, i, s->field_select[dir][i],
3630                                 ref_picture, pix_op, qpix_op,
3631                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3632                 }
3633             }else{
3634                 /* top field */
3635                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3636                             1, 0, s->field_select[dir][0],
3637                             ref_picture, pix_op,
3638                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3639                 /* bottom field */
3640                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3641                             1, 1, s->field_select[dir][1],
3642                             ref_picture, pix_op,
3643                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3644             }
3645         } else {
3646             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3647                 ref_picture= s->current_picture_ptr->data;
3648             }
3649
3650             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3651                         0, 0, s->field_select[dir][0],
3652                         ref_picture, pix_op,
3653                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3654         }
3655         break;
3656     case MV_TYPE_16X8:
3657         for(i=0; i<2; i++){
3658             uint8_t ** ref2picture;
3659
3660             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3661                 ref2picture= ref_picture;
3662             }else{
3663                 ref2picture= s->current_picture_ptr->data;
3664             }
3665
3666             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3667                         0, 0, s->field_select[dir][i],
3668                         ref2picture, pix_op,
3669                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3670
3671             dest_y += 16*s->linesize;
3672             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3673             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3674         }
3675         break;
3676     case MV_TYPE_DMV:
3677         if(s->picture_structure == PICT_FRAME){
3678             for(i=0; i<2; i++){
3679                 int j;
3680                 for(j=0; j<2; j++){
3681                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3682                                 1, j, j^i,
3683                                 ref_picture, pix_op,
3684                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3685                 }
3686                 pix_op = s->dsp.avg_pixels_tab;
3687             }
3688         }else{
3689             for(i=0; i<2; i++){
3690                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3691                             0, 0, s->picture_structure != i+1,
3692                             ref_picture, pix_op,
3693                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3694
3695                 // after put we make avg of the same block
3696                 pix_op=s->dsp.avg_pixels_tab;
3697
3698                 //opposite parity is always in the same frame if this is second field
3699                 if(!s->first_field){
3700                     ref_picture = s->current_picture_ptr->data;
3701                 }
3702             }
3703         }
3704     break;
3705     default: assert(0);
3706     }
3707 }
3708
3709 /**
3710  * motion compensation of a single macroblock
3711  * @param s context
3712  * @param dest_y luma destination pointer
3713  * @param dest_cb chroma cb/u destination pointer
3714  * @param dest_cr chroma cr/v destination pointer
3715  * @param dir direction (0->forward, 1->backward)
3716  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3717  * @param pic_op halfpel motion compensation function (average or put normally)
3718  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3719  */
3720 static inline void MPV_motion_lowres(MpegEncContext *s,
3721                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3722                               int dir, uint8_t **ref_picture,
3723                               h264_chroma_mc_func *pix_op)
3724 {
3725     int mx, my;
3726     int mb_x, mb_y, i;
3727     const int lowres= s->avctx->lowres;
3728     const int block_s= 8>>lowres;
3729
3730     mb_x = s->mb_x;
3731     mb_y = s->mb_y;
3732
3733     switch(s->mv_type) {
3734     case MV_TYPE_16X16:
3735         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3736                     0, 0, 0,
3737                     ref_picture, pix_op,
3738                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3739         break;
3740     case MV_TYPE_8X8:
3741         mx = 0;
3742         my = 0;
3743             for(i=0;i<4;i++) {
3744                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3745                             ref_picture[0], 0, 0,
3746                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3747                             s->width, s->height, s->linesize,
3748                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3749                             block_s, block_s, pix_op,
3750                             s->mv[dir][i][0], s->mv[dir][i][1]);
3751
3752                 mx += s->mv[dir][i][0];
3753                 my += s->mv[dir][i][1];
3754             }
3755
3756         if(!(s->flags&CODEC_FLAG_GRAY))
3757             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3758         break;
3759     case MV_TYPE_FIELD:
3760         if (s->picture_structure == PICT_FRAME) {
3761             /* top field */
3762             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3763                         1, 0, s->field_select[dir][0],
3764                         ref_picture, pix_op,
3765                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3766             /* bottom field */
3767             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3768                         1, 1, s->field_select[dir][1],
3769                         ref_picture, pix_op,
3770                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3771         } else {
3772             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3773                 ref_picture= s->current_picture_ptr->data;
3774             }
3775
3776             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3777                         0, 0, s->field_select[dir][0],
3778                         ref_picture, pix_op,
3779                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3780         }
3781         break;
3782     case MV_TYPE_16X8:
3783         for(i=0; i<2; i++){
3784             uint8_t ** ref2picture;
3785
3786             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3787                 ref2picture= ref_picture;
3788             }else{
3789                 ref2picture= s->current_picture_ptr->data;
3790             }
3791
3792             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3793                         0, 0, s->field_select[dir][i],
3794                         ref2picture, pix_op,
3795                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3796
3797             dest_y += 2*block_s*s->linesize;
3798             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3799             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3800         }
3801         break;
3802     case MV_TYPE_DMV:
3803         if(s->picture_structure == PICT_FRAME){
3804             for(i=0; i<2; i++){
3805                 int j;
3806                 for(j=0; j<2; j++){
3807                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3808                                 1, j, j^i,
3809                                 ref_picture, pix_op,
3810                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3811                 }
3812                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3813             }
3814         }else{
3815             for(i=0; i<2; i++){
3816                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3817                             0, 0, s->picture_structure != i+1,
3818                             ref_picture, pix_op,
3819                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3820
3821                 // after put we make avg of the same block
3822                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3823
3824                 //opposite parity is always in the same frame if this is second field
3825                 if(!s->first_field){
3826                     ref_picture = s->current_picture_ptr->data;
3827                 }
3828             }
3829         }
3830     break;
3831     default: assert(0);
3832     }
3833 }
3834
3835 /* put block[] to dest[] */
3836 static inline void put_dct(MpegEncContext *s,
3837                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3838 {
3839     s->dct_unquantize_intra(s, block, i, qscale);
3840     s->dsp.idct_put (dest, line_size, block);
3841 }
3842
3843 /* add block[] to dest[] */
3844 static inline void add_dct(MpegEncContext *s,
3845                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3846 {
3847     if (s->block_last_index[i] >= 0) {
3848         s->dsp.idct_add (dest, line_size, block);
3849     }
3850 }
3851
3852 static inline void add_dequant_dct(MpegEncContext *s,
3853                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3854 {
3855     if (s->block_last_index[i] >= 0) {
3856         s->dct_unquantize_inter(s, block, i, qscale);
3857
3858         s->dsp.idct_add (dest, line_size, block);
3859     }
3860 }
3861
3862 /**
3863  * cleans dc, ac, coded_block for the current non intra MB
3864  */
3865 void ff_clean_intra_table_entries(MpegEncContext *s)
3866 {
3867     int wrap = s->b8_stride;
3868     int xy = s->block_index[0];
3869
3870     s->dc_val[0][xy           ] =
3871     s->dc_val[0][xy + 1       ] =
3872     s->dc_val[0][xy     + wrap] =
3873     s->dc_val[0][xy + 1 + wrap] = 1024;
3874     /* ac pred */
3875     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3876     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3877     if (s->msmpeg4_version>=3) {
3878         s->coded_block[xy           ] =
3879         s->coded_block[xy + 1       ] =
3880         s->coded_block[xy     + wrap] =
3881         s->coded_block[xy + 1 + wrap] = 0;
3882     }
3883     /* chroma */
3884     wrap = s->mb_stride;
3885     xy = s->mb_x + s->mb_y * wrap;
3886     s->dc_val[1][xy] =
3887     s->dc_val[2][xy] = 1024;
3888     /* ac pred */
3889     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3890     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3891
3892     s->mbintra_table[xy]= 0;
3893 }
3894
3895 /* generic function called after a macroblock has been parsed by the
3896    decoder or after it has been encoded by the encoder.
3897
3898    Important variables used:
3899    s->mb_intra : true if intra macroblock
3900    s->mv_dir   : motion vector direction
3901    s->mv_type  : motion vector type
3902    s->mv       : motion vector
3903    s->interlaced_dct : true if interlaced dct used (mpeg2)
3904  */
3905 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3906 {
3907     int mb_x, mb_y;
3908     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3909 #ifdef HAVE_XVMC
3910     if(s->avctx->xvmc_acceleration){
3911         XVMC_decode_mb(s);//xvmc uses pblocks
3912         return;
3913     }
3914 #endif
3915
3916     mb_x = s->mb_x;
3917     mb_y = s->mb_y;
3918
3919     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3920        /* save DCT coefficients */
3921        int i,j;
3922        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3923        for(i=0; i<6; i++)
3924            for(j=0; j<64; j++)
3925                *dct++ = block[i][s->dsp.idct_permutation[j]];
3926     }
3927
3928     s->current_picture.qscale_table[mb_xy]= s->qscale;
3929
3930     /* update DC predictors for P macroblocks */
3931     if (!s->mb_intra) {
3932         if (s->h263_pred || s->h263_aic) {
3933             if(s->mbintra_table[mb_xy])
3934                 ff_clean_intra_table_entries(s);
3935         } else {
3936             s->last_dc[0] =
3937             s->last_dc[1] =
3938             s->last_dc[2] = 128 << s->intra_dc_precision;
3939         }
3940     }
3941     else if (s->h263_pred || s->h263_aic)
3942         s->mbintra_table[mb_xy]=1;
3943
3944     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3945         uint8_t *dest_y, *dest_cb, *dest_cr;
3946         int dct_linesize, dct_offset;
3947         op_pixels_func (*op_pix)[4];
3948         qpel_mc_func (*op_qpix)[16];
3949         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3950         const int uvlinesize= s->current_picture.linesize[1];
3951         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3952         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3953
3954         /* avoid copy if macroblock skipped in last frame too */
3955         /* skip only during decoding as we might trash the buffers during encoding a bit */
3956         if(!s->encoding){
3957             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3958             const int age= s->current_picture.age;
3959
3960             assert(age);
3961
3962             if (s->mb_skipped) {
3963                 s->mb_skipped= 0;
3964                 assert(s->pict_type!=I_TYPE);
3965
3966                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3967                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3968
3969                 /* if previous was skipped too, then nothing to do !  */
3970                 if (*mbskip_ptr >= age && s->current_picture.reference){
3971                     return;
3972                 }
3973             } else if(!s->current_picture.reference){
3974                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3975                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3976             } else{
3977                 *mbskip_ptr = 0; /* not skipped */
3978             }
3979         }
3980
3981         dct_linesize = linesize << s->interlaced_dct;
3982         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3983
3984         if(readable){
3985             dest_y=  s->dest[0];
3986             dest_cb= s->dest[1];
3987             dest_cr= s->dest[2];
3988         }else{
3989             dest_y = s->b_scratchpad;
3990             dest_cb= s->b_scratchpad+16*linesize;
3991             dest_cr= s->b_scratchpad+32*linesize;
3992         }
3993
3994         if (!s->mb_intra) {
3995             /* motion handling */
3996             /* decoding or more than one mb_type (MC was already done otherwise) */
3997             if(!s->encoding){
3998                 if(lowres_flag){
3999                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
4000
4001                     if (s->mv_dir & MV_DIR_FORWARD) {
4002                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
4003                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
4004                     }
4005                     if (s->mv_dir & MV_DIR_BACKWARD) {
4006                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4007                     }
4008                 }else{
4009                     op_qpix= s->me.qpel_put;
4010                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4011                         op_pix = s->dsp.put_pixels_tab;
4012                     }else{
4013                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4014                     }
4015                     if (s->mv_dir & MV_DIR_FORWARD) {
4016                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4017                         op_pix = s->dsp.avg_pixels_tab;
4018                         op_qpix= s->me.qpel_avg;
4019                     }
4020                     if (s->mv_dir & MV_DIR_BACKWARD) {
4021                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4022                     }
4023                 }
4024             }
4025
4026             /* skip dequant / idct if we are really late ;) */
4027             if(s->hurry_up>1) goto skip_idct;
4028             if(s->avctx->skip_idct){
4029                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4030                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4031                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4032                     goto skip_idct;
4033             }
4034
4035             /* add dct residue */
4036             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4037                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4038                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4039                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4040                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4041                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4042
4043                 if(!(s->flags&CODEC_FLAG_GRAY)){
4044                     if (s->chroma_y_shift){
4045                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4046                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4047                     }else{
4048                         dct_linesize >>= 1;
4049                         dct_offset >>=1;
4050                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4051                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4052                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4053                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4054                     }
4055                 }
4056             } else if(s->codec_id != CODEC_ID_WMV2){
4057                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4058                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4059                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4060                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4061
4062                 if(!(s->flags&CODEC_FLAG_GRAY)){
4063                     if(s->chroma_y_shift){//Chroma420
4064                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4065                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4066                     }else{
4067                         //chroma422
4068                         dct_linesize = uvlinesize << s->interlaced_dct;
4069                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4070
4071                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4072                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4073                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4074                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4075                         if(!s->chroma_x_shift){//Chroma444
4076                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4077                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4078                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4079                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4080                         }
4081                     }
4082                 }//fi gray
4083             }
4084             else if (ENABLE_WMV2) {
4085                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4086             }
4087         } else {
4088             /* dct only in intra block */
4089             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4090                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4091                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4092                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4093                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4094
4095                 if(!(s->flags&CODEC_FLAG_GRAY)){
4096                     if(s->chroma_y_shift){
4097                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4098                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4099                     }else{
4100                         dct_offset >>=1;
4101                         dct_linesize >>=1;
4102                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4103                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4104                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4105                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4106                     }
4107                 }
4108             }else{
4109                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4110                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4111                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4112                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4113
4114                 if(!(s->flags&CODEC_FLAG_GRAY)){
4115                     if(s->chroma_y_shift){
4116                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4117                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4118                     }else{
4119
4120                         dct_linesize = uvlinesize << s->interlaced_dct;
4121                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4122
4123                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4124                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4125                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4126                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4127                         if(!s->chroma_x_shift){//Chroma444
4128                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4129                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4130                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4131                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4132                         }
4133                     }
4134                 }//gray
4135             }
4136         }
4137 skip_idct:
4138         if(!readable){
4139             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4140             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4141             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4142         }
4143     }
4144 }
4145
4146 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4147     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4148     else                  MPV_decode_mb_internal(s, block, 0);
4149 }
4150
4151 #ifdef CONFIG_ENCODERS
4152
4153 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4154 {
4155     static const char tab[64]=
4156         {3,2,2,1,1,1,1,1,
4157          1,1,1,1,1,1,1,1,
4158          1,1,1,1,1,1,1,1,
4159          0,0,0,0,0,0,0,0,
4160          0,0,0,0,0,0,0,0,
4161          0,0,0,0,0,0,0,0,
4162          0,0,0,0,0,0,0,0,
4163          0,0,0,0,0,0,0,0};
4164     int score=0;
4165     int run=0;
4166     int i;
4167     DCTELEM *block= s->block[n];
4168     const int last_index= s->block_last_index[n];
4169     int skip_dc;
4170
4171     if(threshold<0){
4172         skip_dc=0;
4173         threshold= -threshold;
4174     }else
4175         skip_dc=1;
4176
4177     /* are all which we could set to zero are allready zero? */
4178     if(last_index<=skip_dc - 1) return;
4179
4180     for(i=0; i<=last_index; i++){
4181         const int j = s->intra_scantable.permutated[i];
4182         const int level = FFABS(block[j]);
4183         if(level==1){
4184             if(skip_dc && i==0) continue;
4185             score+= tab[run];
4186             run=0;
4187         }else if(level>1){
4188             return;
4189         }else{
4190             run++;
4191         }
4192     }
4193     if(score >= threshold) return;
4194     for(i=skip_dc; i<=last_index; i++){
4195         const int j = s->intra_scantable.permutated[i];
4196         block[j]=0;
4197     }
4198     if(block[0]) s->block_last_index[n]= 0;
4199     else         s->block_last_index[n]= -1;
4200 }
4201
4202 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4203 {
4204     int i;
4205     const int maxlevel= s->max_qcoeff;
4206     const int minlevel= s->min_qcoeff;
4207     int overflow=0;
4208
4209     if(s->mb_intra){
4210         i=1; //skip clipping of intra dc
4211     }else
4212         i=0;
4213
4214     for(;i<=last_index; i++){
4215         const int j= s->intra_scantable.permutated[i];
4216         int level = block[j];
4217
4218         if     (level>maxlevel){
4219             level=maxlevel;
4220             overflow++;
4221         }else if(level<minlevel){
4222             level=minlevel;
4223             overflow++;
4224         }
4225
4226         block[j]= level;
4227     }
4228
4229     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4230         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4231 }
4232
4233 #endif //CONFIG_ENCODERS
4234
4235 /**
4236  *
4237  * @param h is the normal height, this will be reduced automatically if needed for the last row
4238  */
4239 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4240     if (s->avctx->draw_horiz_band) {
4241         AVFrame *src;
4242         int offset[4];
4243
4244         if(s->picture_structure != PICT_FRAME){
4245             h <<= 1;
4246             y <<= 1;
4247             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4248         }
4249
4250         h= FFMIN(h, s->avctx->height - y);
4251
4252         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4253             src= (AVFrame*)s->current_picture_ptr;
4254         else if(s->last_picture_ptr)
4255             src= (AVFrame*)s->last_picture_ptr;
4256         else
4257             return;
4258
4259         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4260             offset[0]=
4261             offset[1]=
4262             offset[2]=
4263             offset[3]= 0;
4264         }else{
4265             offset[0]= y * s->linesize;;
4266             offset[1]=
4267             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4268             offset[3]= 0;
4269         }
4270
4271         emms_c();
4272
4273         s->avctx->draw_horiz_band(s->avctx, src, offset,
4274                                   y, s->picture_structure, h);
4275     }
4276 }
4277
4278 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4279     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4280     const int uvlinesize= s->current_picture.linesize[1];
4281     const int mb_size= 4 - s->avctx->lowres;
4282
4283     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4284     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4285     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4286     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4287     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4288     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4289     //block_index is not used by mpeg2, so it is not affected by chroma_format
4290
4291     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4292     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4293     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4294
4295     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4296     {
4297         s->dest[0] += s->mb_y *   linesize << mb_size;
4298         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4299         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4300     }
4301 }
4302
4303 #ifdef CONFIG_ENCODERS
4304
4305 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4306     int x, y;
4307 //FIXME optimize
4308     for(y=0; y<8; y++){
4309         for(x=0; x<8; x++){
4310             int x2, y2;
4311             int sum=0;
4312             int sqr=0;
4313             int count=0;
4314
4315             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4316                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4317                     int v= ptr[x2 + y2*stride];
4318                     sum += v;
4319                     sqr += v*v;
4320                     count++;
4321                 }
4322             }
4323             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4324         }
4325     }
4326 }
4327
4328 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4329 {
4330     int16_t weight[8][64];
4331     DCTELEM orig[8][64];
4332     const int mb_x= s->mb_x;
4333     const int mb_y= s->mb_y;
4334     int i;
4335     int skip_dct[8];
4336     int dct_offset   = s->linesize*8; //default for progressive frames
4337     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4338     int wrap_y, wrap_c;
4339
4340     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4341
4342     if(s->adaptive_quant){
4343         const int last_qp= s->qscale;
4344         const int mb_xy= mb_x + mb_y*s->mb_stride;
4345
4346         s->lambda= s->lambda_table[mb_xy];
4347         update_qscale(s);
4348
4349         if(!(s->flags&CODEC_FLAG_QP_RD)){
4350             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4351             s->dquant= s->qscale - last_qp;
4352
4353             if(s->out_format==FMT_H263){
4354                 s->dquant= av_clip(s->dquant, -2, 2);
4355
4356                 if(s->codec_id==CODEC_ID_MPEG4){
4357                     if(!s->mb_intra){
4358                         if(s->pict_type == B_TYPE){
4359                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4360                                 s->dquant= 0;
4361                         }
4362                         if(s->mv_type==MV_TYPE_8X8)
4363                             s->dquant=0;
4364                     }
4365                 }
4366             }
4367         }
4368         ff_set_qscale(s, last_qp + s->dquant);
4369     }else if(s->flags&CODEC_FLAG_QP_RD)
4370         ff_set_qscale(s, s->qscale + s->dquant);
4371
4372     wrap_y = s->linesize;
4373     wrap_c = s->uvlinesize;
4374     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4375     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4376     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4377
4378     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4379         uint8_t *ebuf= s->edge_emu_buffer + 32;
4380         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4381         ptr_y= ebuf;
4382         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4383         ptr_cb= ebuf+18*wrap_y;
4384         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4385         ptr_cr= ebuf+18*wrap_y+8;
4386     }
4387
4388     if (s->mb_intra) {
4389         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4390             int progressive_score, interlaced_score;
4391
4392             s->interlaced_dct=0;
4393             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4394                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4395
4396             if(progressive_score > 0){
4397                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4398                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4399                 if(progressive_score > interlaced_score){
4400                     s->interlaced_dct=1;
4401
4402                     dct_offset= wrap_y;
4403                     wrap_y<<=1;
4404                     if (s->chroma_format == CHROMA_422)
4405                         wrap_c<<=1;
4406                 }
4407             }
4408         }
4409
4410         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4411         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4412         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4413         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4414
4415         if(s->flags&CODEC_FLAG_GRAY){
4416             skip_dct[4]= 1;
4417             skip_dct[5]= 1;
4418         }else{
4419             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4420             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4421             if(!s->chroma_y_shift){ /* 422 */
4422                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4423                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4424             }
4425         }
4426     }else{
4427         op_pixels_func (*op_pix)[4];
4428         qpel_mc_func (*op_qpix)[16];
4429         uint8_t *dest_y, *dest_cb, *dest_cr;
4430
4431         dest_y  = s->dest[0];
4432         dest_cb = s->dest[1];
4433         dest_cr = s->dest[2];
4434
4435         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4436             op_pix = s->dsp.put_pixels_tab;
4437             op_qpix= s->dsp.put_qpel_pixels_tab;
4438         }else{
4439             op_pix = s->dsp.put_no_rnd_pixels_tab;
4440             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4441         }
4442
4443         if (s->mv_dir & MV_DIR_FORWARD) {
4444             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4445             op_pix = s->dsp.avg_pixels_tab;
4446             op_qpix= s->dsp.avg_qpel_pixels_tab;
4447         }
4448         if (s->mv_dir & MV_DIR_BACKWARD) {
4449             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4450         }
4451
4452         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4453             int progressive_score, interlaced_score;
4454
4455             s->interlaced_dct=0;
4456             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4457                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4458
4459             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4460
4461             if(progressive_score>0){
4462                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4463                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4464
4465                 if(progressive_score > interlaced_score){
4466                     s->interlaced_dct=1;
4467
4468                     dct_offset= wrap_y;
4469                     wrap_y<<=1;
4470                     if (s->chroma_format == CHROMA_422)
4471                         wrap_c<<=1;
4472                 }
4473             }
4474         }
4475
4476         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4477         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4478         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4479         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4480
4481         if(s->flags&CODEC_FLAG_GRAY){
4482             skip_dct[4]= 1;
4483             skip_dct[5]= 1;
4484         }else{
4485             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4486             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4487             if(!s->chroma_y_shift){ /* 422 */
4488                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4489                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4490             }
4491         }
4492         /* pre quantization */
4493         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4494             //FIXME optimize
4495             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4496             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4497             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4498             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4499             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4500             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4501             if(!s->chroma_y_shift){ /* 422 */
4502                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4503                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4504             }
4505         }
4506     }
4507
4508     if(s->avctx->quantizer_noise_shaping){
4509         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4510         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4511         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4512         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4513         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4514         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4515         if(!s->chroma_y_shift){ /* 422 */
4516             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4517             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4518         }
4519         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4520     }
4521
4522     /* DCT & quantize */
4523     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4524     {
4525         for(i=0;i<mb_block_count;i++) {
4526             if(!skip_dct[i]){
4527                 int overflow;
4528                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4529             // FIXME we could decide to change to quantizer instead of clipping
4530             // JS: I don't think that would be a good idea it could lower quality instead
4531             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4532                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4533             }else
4534                 s->block_last_index[i]= -1;
4535         }
4536         if(s->avctx->quantizer_noise_shaping){
4537             for(i=0;i<mb_block_count;i++) {
4538                 if(!skip_dct[i]){
4539                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4540                 }
4541             }
4542         }
4543
4544         if(s->luma_elim_threshold && !s->mb_intra)
4545             for(i=0; i<4; i++)
4546                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4547         if(s->chroma_elim_threshold && !s->mb_intra)
4548             for(i=4; i<mb_block_count; i++)
4549                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4550
4551         if(s->flags & CODEC_FLAG_CBP_RD){
4552             for(i=0;i<mb_block_count;i++) {
4553                 if(s->block_last_index[i] == -1)
4554                     s->coded_score[i]= INT_MAX/256;
4555             }
4556         }
4557     }
4558
4559     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4560         s->block_last_index[4]=
4561         s->block_last_index[5]= 0;
4562         s->block[4][0]=
4563         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4564     }
4565
4566     //non c quantize code returns incorrect block_last_index FIXME
4567     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4568         for(i=0; i<mb_block_count; i++){
4569             int j;
4570             if(s->block_last_index[i]>0){
4571                 for(j=63; j>0; j--){
4572                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4573                 }
4574                 s->block_last_index[i]= j;
4575             }
4576         }
4577     }
4578
4579     /* huffman encode */
4580     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4581     case CODEC_ID_MPEG1VIDEO:
4582     case CODEC_ID_MPEG2VIDEO:
4583         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4584     case CODEC_ID_MPEG4:
4585         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4586     case CODEC_ID_MSMPEG4V2:
4587     case CODEC_ID_MSMPEG4V3:
4588     case CODEC_ID_WMV1:
4589         if (ENABLE_MSMPEG4_ENCODER)
4590             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
4591         break;
4592     case CODEC_ID_WMV2:
4593         if (ENABLE_WMV2_ENCODER)
4594             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
4595         break;
4596     case CODEC_ID_H261:
4597         if (ENABLE_H261_ENCODER)
4598             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
4599         break;
4600     case CODEC_ID_H263:
4601     case CODEC_ID_H263P:
4602     case CODEC_ID_FLV1:
4603     case CODEC_ID_RV10:
4604     case CODEC_ID_RV20:
4605         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4606     case CODEC_ID_MJPEG:
4607         if (ENABLE_MJPEG_ENCODER)
4608             ff_mjpeg_encode_mb(s, s->block);
4609         break;
4610     default:
4611         assert(0);
4612     }
4613 }
4614
4615 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4616 {
4617     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4618     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4619 }
4620
4621 #endif //CONFIG_ENCODERS
4622
4623 void ff_mpeg_flush(AVCodecContext *avctx){
4624     int i;
4625     MpegEncContext *s = avctx->priv_data;
4626
4627     if(s==NULL || s->picture==NULL)
4628         return;
4629
4630     for(i=0; i<MAX_PICTURE_COUNT; i++){
4631        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4632                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4633         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4634     }
4635     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4636
4637     s->mb_x= s->mb_y= 0;
4638
4639     s->parse_context.state= -1;
4640     s->parse_context.frame_start_found= 0;
4641     s->parse_context.overread= 0;
4642     s->parse_context.overread_index= 0;
4643     s->parse_context.index= 0;
4644     s->parse_context.last_index= 0;
4645     s->bitstream_buffer_size=0;
4646     s->pp_time=0;
4647 }
4648
4649 #ifdef CONFIG_ENCODERS
4650 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4651 {
4652     const uint16_t *srcw= (uint16_t*)src;
4653     int words= length>>4;
4654     int bits= length&15;
4655     int i;
4656
4657     if(length==0) return;
4658
4659     if(words < 16){
4660         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4661     }else if(put_bits_count(pb)&7){
4662         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4663     }else{
4664         for(i=0; put_bits_count(pb)&31; i++)
4665             put_bits(pb, 8, src[i]);
4666         flush_put_bits(pb);
4667         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4668         skip_put_bytes(pb, 2*words-i);
4669     }
4670
4671     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4672 }
4673
4674 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4675     int i;
4676
4677     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4678
4679     /* mpeg1 */
4680     d->mb_skip_run= s->mb_skip_run;
4681     for(i=0; i<3; i++)
4682         d->last_dc[i]= s->last_dc[i];
4683
4684     /* statistics */
4685     d->mv_bits= s->mv_bits;
4686     d->i_tex_bits= s->i_tex_bits;
4687     d->p_tex_bits= s->p_tex_bits;
4688     d->i_count= s->i_count;
4689     d->f_count= s->f_count;
4690     d->b_count= s->b_count;
4691     d->skip_count= s->skip_count;
4692     d->misc_bits= s->misc_bits;
4693     d->last_bits= 0;
4694
4695     d->mb_skipped= 0;
4696     d->qscale= s->qscale;
4697     d->dquant= s->dquant;
4698 }
4699
4700 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4701     int i;
4702
4703     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4704     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4705
4706     /* mpeg1 */
4707     d->mb_skip_run= s->mb_skip_run;
4708     for(i=0; i<3; i++)
4709         d->last_dc[i]= s->last_dc[i];
4710
4711     /* statistics */
4712     d->mv_bits= s->mv_bits;
4713     d->i_tex_bits= s->i_tex_bits;
4714     d->p_tex_bits= s->p_tex_bits;
4715     d->i_count= s->i_count;
4716     d->f_count= s->f_count;
4717     d->b_count= s->b_count;
4718     d->skip_count= s->skip_count;
4719     d->misc_bits= s->misc_bits;
4720
4721     d->mb_intra= s->mb_intra;
4722     d->mb_skipped= s->mb_skipped;
4723     d->mv_type= s->mv_type;
4724     d->mv_dir= s->mv_dir;
4725     d->pb= s->pb;
4726     if(s->data_partitioning){
4727         d->pb2= s->pb2;
4728         d->tex_pb= s->tex_pb;
4729     }
4730     d->block= s->block;
4731     for(i=0; i<8; i++)
4732         d->block_last_index[i]= s->block_last_index[i];
4733     d->interlaced_dct= s->interlaced_dct;
4734     d->qscale= s->qscale;
4735 }
4736
4737 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4738                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4739                            int *dmin, int *next_block, int motion_x, int motion_y)
4740 {
4741     int score;
4742     uint8_t *dest_backup[3];
4743
4744     copy_context_before_encode(s, backup, type);
4745
4746     s->block= s->blocks[*next_block];
4747     s->pb= pb[*next_block];
4748     if(s->data_partitioning){
4749         s->pb2   = pb2   [*next_block];
4750         s->tex_pb= tex_pb[*next_block];
4751     }
4752
4753     if(*next_block){
4754         memcpy(dest_backup, s->dest, sizeof(s->dest));
4755         s->dest[0] = s->rd_scratchpad;
4756         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4757         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4758         assert(s->linesize >= 32); //FIXME
4759     }
4760
4761     encode_mb(s, motion_x, motion_y);
4762
4763     score= put_bits_count(&s->pb);
4764     if(s->data_partitioning){
4765         score+= put_bits_count(&s->pb2);
4766         score+= put_bits_count(&s->tex_pb);
4767     }
4768
4769     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4770         MPV_decode_mb(s, s->block);
4771
4772         score *= s->lambda2;
4773         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4774     }
4775
4776     if(*next_block){
4777         memcpy(s->dest, dest_backup, sizeof(s->dest));
4778     }
4779
4780     if(score<*dmin){
4781         *dmin= score;
4782         *next_block^=1;
4783
4784         copy_context_after_encode(best, s, type);
4785     }
4786 }
4787
4788 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4789     uint32_t *sq = ff_squareTbl + 256;
4790     int acc=0;
4791     int x,y;
4792
4793     if(w==16 && h==16)
4794         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4795     else if(w==8 && h==8)
4796         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4797
4798     for(y=0; y<h; y++){
4799         for(x=0; x<w; x++){
4800             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4801         }
4802     }
4803
4804     assert(acc>=0);
4805
4806     return acc;
4807 }
4808
4809 static int sse_mb(MpegEncContext *s){
4810     int w= 16;
4811     int h= 16;
4812
4813     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4814     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4815
4816     if(w==16 && h==16)
4817       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4818         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4819                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4820                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4821       }else{
4822         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4823                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4824                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4825       }
4826     else
4827         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4828                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4829                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4830 }
4831
4832 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4833     MpegEncContext *s= arg;
4834
4835
4836     s->me.pre_pass=1;
4837     s->me.dia_size= s->avctx->pre_dia_size;
4838     s->first_slice_line=1;
4839     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4840         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4841             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4842         }
4843         s->first_slice_line=0;
4844     }
4845
4846     s->me.pre_pass=0;
4847
4848     return 0;
4849 }
4850
4851 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4852     MpegEncContext *s= arg;
4853
4854     ff_check_alignment();
4855
4856     s->me.dia_size= s->avctx->dia_size;
4857     s->first_slice_line=1;
4858     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4859         s->mb_x=0; //for block init below
4860         ff_init_block_index(s);
4861         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4862             s->block_index[0]+=2;
4863             s->block_index[1]+=2;
4864             s->block_index[2]+=2;
4865             s->block_index[3]+=2;
4866
4867             /* compute motion vector & mb_type and store in context */
4868             if(s->pict_type==B_TYPE)
4869                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4870             else
4871                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4872         }
4873         s->first_slice_line=0;
4874     }
4875     return 0;
4876 }
4877
4878 static int mb_var_thread(AVCodecContext *c, void *arg){
4879     MpegEncContext *s= arg;
4880     int mb_x, mb_y;
4881
4882     ff_check_alignment();
4883
4884     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4885         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4886             int xx = mb_x * 16;
4887             int yy = mb_y * 16;
4888             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4889             int varc;
4890             int sum = s->dsp.pix_sum(pix, s->linesize);
4891
4892             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4893
4894             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4895             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4896             s->me.mb_var_sum_temp    += varc;
4897         }
4898     }
4899     return 0;
4900 }
4901
4902 static void write_slice_end(MpegEncContext *s){
4903     if(s->codec_id==CODEC_ID_MPEG4){
4904         if(s->partitioned_frame){
4905             ff_mpeg4_merge_partitions(s);
4906         }
4907
4908         ff_mpeg4_stuffing(&s->pb);
4909     }else if(ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
4910         ff_mjpeg_encode_stuffing(&s->pb);
4911     }
4912
4913     align_put_bits(&s->pb);
4914     flush_put_bits(&s->pb);
4915
4916     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4917         s->misc_bits+= get_bits_diff(s);
4918 }
4919
4920 static int encode_thread(AVCodecContext *c, void *arg){
4921     MpegEncContext *s= arg;
4922     int mb_x, mb_y, pdif = 0;
4923     int i, j;
4924     MpegEncContext best_s, backup_s;
4925     uint8_t bit_buf[2][MAX_MB_BYTES];
4926     uint8_t bit_buf2[2][MAX_MB_BYTES];
4927     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4928     PutBitContext pb[2], pb2[2], tex_pb[2];
4929 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4930
4931     ff_check_alignment();
4932
4933     for(i=0; i<2; i++){
4934         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4935         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4936         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4937     }
4938
4939     s->last_bits= put_bits_count(&s->pb);
4940     s->mv_bits=0;
4941     s->misc_bits=0;
4942     s->i_tex_bits=0;
4943     s->p_tex_bits=0;
4944     s->i_count=0;
4945     s->f_count=0;
4946     s->b_count=0;
4947     s->skip_count=0;
4948
4949     for(i=0; i<3; i++){
4950         /* init last dc values */
4951         /* note: quant matrix value (8) is implied here */
4952         s->last_dc[i] = 128 << s->intra_dc_precision;
4953
4954         s->current_picture.error[i] = 0;
4955     }
4956     s->mb_skip_run = 0;
4957     memset(s->last_mv, 0, sizeof(s->last_mv));
4958
4959     s->last_mv_dir = 0;
4960
4961     switch(s->codec_id){
4962     case CODEC_ID_H263:
4963     case CODEC_ID_H263P:
4964     case CODEC_ID_FLV1:
4965         s->gob_index = ff_h263_get_gob_height(s);
4966         break;
4967     case CODEC_ID_MPEG4:
4968         if(s->partitioned_frame)
4969             ff_mpeg4_init_partitions(s);
4970         break;
4971     }
4972
4973     s->resync_mb_x=0;
4974     s->resync_mb_y=0;
4975     s->first_slice_line = 1;
4976     s->ptr_lastgob = s->pb.buf;
4977     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4978 //    printf("row %d at %X\n", s->mb_y, (int)s);
4979         s->mb_x=0;
4980         s->mb_y= mb_y;
4981
4982         ff_set_qscale(s, s->qscale);
4983         ff_init_block_index(s);
4984
4985         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4986             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4987             int mb_type= s->mb_type[xy];
4988 //            int d;
4989             int dmin= INT_MAX;
4990             int dir;
4991
4992             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4993                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4994                 return -1;
4995             }
4996             if(s->data_partitioning){
4997                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4998                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4999                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
5000                     return -1;
5001                 }
5002             }
5003
5004             s->mb_x = mb_x;
5005             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
5006             ff_update_block_index(s);
5007
5008             if(ENABLE_H261_ENCODER && s->codec_id == CODEC_ID_H261){
5009                 ff_h261_reorder_mb_index(s);
5010                 xy= s->mb_y*s->mb_stride + s->mb_x;
5011                 mb_type= s->mb_type[xy];
5012             }
5013
5014             /* write gob / video packet header  */
5015             if(s->rtp_mode){
5016                 int current_packet_size, is_gob_start;
5017
5018                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5019
5020                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5021
5022                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5023
5024                 switch(s->codec_id){
5025                 case CODEC_ID_H263:
5026                 case CODEC_ID_H263P:
5027                     if(!s->h263_slice_structured)
5028                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5029                     break;
5030                 case CODEC_ID_MPEG2VIDEO:
5031                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5032                 case CODEC_ID_MPEG1VIDEO:
5033                     if(s->mb_skip_run) is_gob_start=0;
5034                     break;
5035                 }
5036
5037                 if(is_gob_start){
5038                     if(s->start_mb_y != mb_y || mb_x!=0){
5039                         write_slice_end(s);
5040
5041                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5042                             ff_mpeg4_init_partitions(s);
5043                         }
5044                     }
5045
5046                     assert((put_bits_count(&s->pb)&7) == 0);
5047                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5048
5049                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5050                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5051                         int d= 100 / s->avctx->error_rate;
5052                         if(r % d == 0){
5053                             current_packet_size=0;
5054 #ifndef ALT_BITSTREAM_WRITER
5055                             s->pb.buf_ptr= s->ptr_lastgob;
5056 #endif
5057                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5058                         }
5059                     }
5060
5061                     if (s->avctx->rtp_callback){
5062                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5063                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5064                     }
5065
5066                     switch(s->codec_id){
5067                     case CODEC_ID_MPEG4:
5068                         ff_mpeg4_encode_video_packet_header(s);
5069                         ff_mpeg4_clean_buffers(s);
5070                     break;
5071                     case CODEC_ID_MPEG1VIDEO:
5072                     case CODEC_ID_MPEG2VIDEO:
5073                         ff_mpeg1_encode_slice_header(s);
5074                         ff_mpeg1_clean_buffers(s);
5075                     break;
5076                     case CODEC_ID_H263:
5077                     case CODEC_ID_H263P:
5078                         h263_encode_gob_header(s, mb_y);
5079                     break;
5080                     }
5081
5082                     if(s->flags&CODEC_FLAG_PASS1){
5083                         int bits= put_bits_count(&s->pb);
5084                         s->misc_bits+= bits - s->last_bits;
5085                         s->last_bits= bits;
5086                     }
5087
5088                     s->ptr_lastgob += current_packet_size;
5089                     s->first_slice_line=1;
5090                     s->resync_mb_x=mb_x;
5091                     s->resync_mb_y=mb_y;
5092                 }
5093             }
5094
5095             if(  (s->resync_mb_x   == s->mb_x)
5096                && s->resync_mb_y+1 == s->mb_y){
5097                 s->first_slice_line=0;
5098             }
5099
5100             s->mb_skipped=0;
5101             s->dquant=0; //only for QP_RD
5102
5103             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5104                 int next_block=0;
5105                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5106
5107                 copy_context_before_encode(&backup_s, s, -1);
5108                 backup_s.pb= s->pb;
5109                 best_s.data_partitioning= s->data_partitioning;
5110                 best_s.partitioned_frame= s->partitioned_frame;
5111                 if(s->data_partitioning){
5112                     backup_s.pb2= s->pb2;
5113                     backup_s.tex_pb= s->tex_pb;
5114                 }
5115
5116                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5117                     s->mv_dir = MV_DIR_FORWARD;
5118                     s->mv_type = MV_TYPE_16X16;
5119                     s->mb_intra= 0;
5120                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5121                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5122                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5123                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5124                 }
5125                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5126                     s->mv_dir = MV_DIR_FORWARD;
5127                     s->mv_type = MV_TYPE_FIELD;
5128                     s->mb_intra= 0;
5129                     for(i=0; i<2; i++){
5130                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5131                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5132                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5133                     }
5134                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5135                                  &dmin, &next_block, 0, 0);
5136                 }
5137                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5138                     s->mv_dir = MV_DIR_FORWARD;
5139                     s->mv_type = MV_TYPE_16X16;
5140                     s->mb_intra= 0;
5141                     s->mv[0][0][0] = 0;
5142                     s->mv[0][0][1] = 0;
5143                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5144                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5145                 }
5146                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5147                     s->mv_dir = MV_DIR_FORWARD;
5148                     s->mv_type = MV_TYPE_8X8;
5149                     s->mb_intra= 0;
5150                     for(i=0; i<4; i++){
5151                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5152                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5153                     }
5154                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5155                                  &dmin, &next_block, 0, 0);
5156                 }
5157                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5158                     s->mv_dir = MV_DIR_FORWARD;
5159                     s->mv_type = MV_TYPE_16X16;
5160                     s->mb_intra= 0;
5161                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5162                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5163                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5164                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5165                 }
5166                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5167                     s->mv_dir = MV_DIR_BACKWARD;
5168                     s->mv_type = MV_TYPE_16X16;
5169                     s->mb_intra= 0;
5170                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5171                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5172                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5173                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5174                 }
5175                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5176                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5177                     s->mv_type = MV_TYPE_16X16;
5178                     s->mb_intra= 0;
5179                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5180                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5181                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5182                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5183                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5184                                  &dmin, &next_block, 0, 0);
5185                 }
5186                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5187                     s->mv_dir = MV_DIR_FORWARD;
5188                     s->mv_type = MV_TYPE_FIELD;
5189                     s->mb_intra= 0;
5190                     for(i=0; i<2; i++){
5191                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5192                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5193                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5194                     }
5195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5196                                  &dmin, &next_block, 0, 0);
5197                 }
5198                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5199                     s->mv_dir = MV_DIR_BACKWARD;
5200                     s->mv_type = MV_TYPE_FIELD;
5201                     s->mb_intra= 0;
5202                     for(i=0; i<2; i++){
5203                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5204                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5205                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5206                     }
5207                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5208                                  &dmin, &next_block, 0, 0);
5209                 }
5210                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5211                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5212                     s->mv_type = MV_TYPE_FIELD;
5213                     s->mb_intra= 0;
5214                     for(dir=0; dir<2; dir++){
5215                         for(i=0; i<2; i++){
5216                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5217                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5218                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5219                         }
5220                     }
5221                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5222                                  &dmin, &next_block, 0, 0);
5223                 }
5224                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5225                     s->mv_dir = 0;
5226                     s->mv_type = MV_TYPE_16X16;
5227                     s->mb_intra= 1;
5228                     s->mv[0][0][0] = 0;
5229                     s->mv[0][0][1] = 0;
5230                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5231                                  &dmin, &next_block, 0, 0);
5232                     if(s->h263_pred || s->h263_aic){
5233                         if(best_s.mb_intra)
5234                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5235                         else
5236                             ff_clean_intra_table_entries(s); //old mode?
5237                     }
5238                 }
5239
5240                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5241                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5242                         const int last_qp= backup_s.qscale;
5243                         int qpi, qp, dc[6];
5244                         DCTELEM ac[6][16];
5245                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5246                         static const int dquant_tab[4]={-1,1,-2,2};
5247
5248                         assert(backup_s.dquant == 0);
5249
5250                         //FIXME intra
5251                         s->mv_dir= best_s.mv_dir;
5252                         s->mv_type = MV_TYPE_16X16;
5253                         s->mb_intra= best_s.mb_intra;
5254                         s->mv[0][0][0] = best_s.mv[0][0][0];
5255                         s->mv[0][0][1] = best_s.mv[0][0][1];
5256                         s->mv[1][0][0] = best_s.mv[1][0][0];
5257                         s->mv[1][0][1] = best_s.mv[1][0][1];
5258
5259                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5260                         for(; qpi<4; qpi++){
5261                             int dquant= dquant_tab[qpi];
5262                             qp= last_qp + dquant;
5263                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5264                                 continue;
5265                             backup_s.dquant= dquant;
5266                             if(s->mb_intra && s->dc_val[0]){
5267                                 for(i=0; i<6; i++){
5268                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5269                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5270                                 }
5271                             }
5272
5273                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5274                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5275                             if(best_s.qscale != qp){
5276                                 if(s->mb_intra && s->dc_val[0]){
5277                                     for(i=0; i<6; i++){
5278                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5279                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5280                                     }
5281                                 }
5282                             }
5283                         }
5284                     }
5285                 }
5286                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5287                     int mx= s->b_direct_mv_table[xy][0];
5288                     int my= s->b_direct_mv_table[xy][1];
5289
5290                     backup_s.dquant = 0;
5291                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5292                     s->mb_intra= 0;
5293                     ff_mpeg4_set_direct_mv(s, mx, my);
5294                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5295                                  &dmin, &next_block, mx, my);
5296                 }
5297                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5298                     backup_s.dquant = 0;
5299                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5300                     s->mb_intra= 0;
5301                     ff_mpeg4_set_direct_mv(s, 0, 0);
5302                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5303                                  &dmin, &next_block, 0, 0);
5304                 }
5305                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5306                     int coded=0;
5307                     for(i=0; i<6; i++)
5308                         coded |= s->block_last_index[i];
5309                     if(coded){
5310                         int mx,my;
5311                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5312                         if(best_s.mv_dir & MV_DIRECT){
5313                             mx=my=0; //FIXME find the one we actually used
5314                             ff_mpeg4_set_direct_mv(s, mx, my);
5315                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5316                             mx= s->mv[1][0][0];
5317                             my= s->mv[1][0][1];
5318                         }else{
5319                             mx= s->mv[0][0][0];
5320                             my= s->mv[0][0][1];
5321                         }
5322
5323                         s->mv_dir= best_s.mv_dir;
5324                         s->mv_type = best_s.mv_type;
5325                         s->mb_intra= 0;
5326 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5327                         s->mv[0][0][1] = best_s.mv[0][0][1];
5328                         s->mv[1][0][0] = best_s.mv[1][0][0];
5329                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5330                         backup_s.dquant= 0;
5331                         s->skipdct=1;
5332                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5333                                         &dmin, &next_block, mx, my);
5334                         s->skipdct=0;
5335                     }
5336                 }
5337
5338                 s->current_picture.qscale_table[xy]= best_s.qscale;
5339
5340                 copy_context_after_encode(s, &best_s, -1);
5341
5342                 pb_bits_count= put_bits_count(&s->pb);
5343                 flush_put_bits(&s->pb);
5344                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5345                 s->pb= backup_s.pb;
5346
5347                 if(s->data_partitioning){
5348                     pb2_bits_count= put_bits_count(&s->pb2);
5349                     flush_put_bits(&s->pb2);
5350                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5351                     s->pb2= backup_s.pb2;
5352
5353                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5354                     flush_put_bits(&s->tex_pb);
5355                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5356                     s->tex_pb= backup_s.tex_pb;
5357                 }
5358                 s->last_bits= put_bits_count(&s->pb);
5359
5360                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5361                     ff_h263_update_motion_val(s);
5362
5363                 if(next_block==0){ //FIXME 16 vs linesize16
5364                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5365                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5366                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5367                 }
5368
5369                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5370                     MPV_decode_mb(s, s->block);
5371             } else {
5372                 int motion_x = 0, motion_y = 0;
5373                 s->mv_type=MV_TYPE_16X16;
5374                 // only one MB-Type possible
5375
5376                 switch(mb_type){
5377                 case CANDIDATE_MB_TYPE_INTRA:
5378                     s->mv_dir = 0;
5379                     s->mb_intra= 1;
5380                     motion_x= s->mv[0][0][0] = 0;
5381                     motion_y= s->mv[0][0][1] = 0;
5382                     break;
5383                 case CANDIDATE_MB_TYPE_INTER:
5384                     s->mv_dir = MV_DIR_FORWARD;
5385                     s->mb_intra= 0;
5386                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5387                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5388                     break;
5389                 case CANDIDATE_MB_TYPE_INTER_I:
5390                     s->mv_dir = MV_DIR_FORWARD;
5391                     s->mv_type = MV_TYPE_FIELD;
5392                     s->mb_intra= 0;
5393                     for(i=0; i<2; i++){
5394                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5395                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5396                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5397                     }
5398                     break;
5399                 case CANDIDATE_MB_TYPE_INTER4V:
5400                     s->mv_dir = MV_DIR_FORWARD;
5401                     s->mv_type = MV_TYPE_8X8;
5402                     s->mb_intra= 0;
5403                     for(i=0; i<4; i++){
5404                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5405                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5406                     }
5407                     break;
5408                 case CANDIDATE_MB_TYPE_DIRECT:
5409                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5410                     s->mb_intra= 0;
5411                     motion_x=s->b_direct_mv_table[xy][0];
5412                     motion_y=s->b_direct_mv_table[xy][1];
5413                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5414                     break;
5415                 case CANDIDATE_MB_TYPE_DIRECT0:
5416                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5417                     s->mb_intra= 0;
5418                     ff_mpeg4_set_direct_mv(s, 0, 0);
5419                     break;
5420                 case CANDIDATE_MB_TYPE_BIDIR:
5421                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5422                     s->mb_intra= 0;
5423                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5424                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5425                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5426                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5427                     break;
5428                 case CANDIDATE_MB_TYPE_BACKWARD:
5429                     s->mv_dir = MV_DIR_BACKWARD;
5430                     s->mb_intra= 0;
5431                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5432                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5433                     break;
5434                 case CANDIDATE_MB_TYPE_FORWARD:
5435                     s->mv_dir = MV_DIR_FORWARD;
5436                     s->mb_intra= 0;
5437                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5438                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5439 //                    printf(" %d %d ", motion_x, motion_y);
5440                     break;
5441                 case CANDIDATE_MB_TYPE_FORWARD_I:
5442                     s->mv_dir = MV_DIR_FORWARD;
5443                     s->mv_type = MV_TYPE_FIELD;
5444                     s->mb_intra= 0;
5445                     for(i=0; i<2; i++){
5446                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5447                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5448                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5449                     }
5450                     break;
5451                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5452                     s->mv_dir = MV_DIR_BACKWARD;
5453                     s->mv_type = MV_TYPE_FIELD;
5454                     s->mb_intra= 0;
5455                     for(i=0; i<2; i++){
5456                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5457                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5458                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5459                     }
5460                     break;
5461                 case CANDIDATE_MB_TYPE_BIDIR_I:
5462                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5463                     s->mv_type = MV_TYPE_FIELD;
5464                     s->mb_intra= 0;
5465                     for(dir=0; dir<2; dir++){
5466                         for(i=0; i<2; i++){
5467                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5468                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5469                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5470                         }
5471                     }
5472                     break;
5473                 default:
5474                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5475                 }
5476
5477                 encode_mb(s, motion_x, motion_y);
5478
5479                 // RAL: Update last macroblock type
5480                 s->last_mv_dir = s->mv_dir;
5481
5482                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5483                     ff_h263_update_motion_val(s);
5484
5485                 MPV_decode_mb(s, s->block);
5486             }
5487
5488             /* clean the MV table in IPS frames for direct mode in B frames */
5489             if(s->mb_intra /* && I,P,S_TYPE */){
5490                 s->p_mv_table[xy][0]=0;
5491                 s->p_mv_table[xy][1]=0;
5492             }
5493
5494             if(s->flags&CODEC_FLAG_PSNR){
5495                 int w= 16;
5496                 int h= 16;
5497
5498                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5499                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5500
5501                 s->current_picture.error[0] += sse(
5502                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5503                     s->dest[0], w, h, s->linesize);
5504                 s->current_picture.error[1] += sse(
5505                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5506                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5507                 s->current_picture.error[2] += sse(
5508                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5509                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5510             }
5511             if(s->loop_filter){
5512                 if(s->out_format == FMT_H263)
5513                     ff_h263_loop_filter(s);
5514             }
5515 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5516         }
5517     }
5518
5519     //not beautiful here but we must write it before flushing so it has to be here
5520     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5521         msmpeg4_encode_ext_header(s);
5522
5523     write_slice_end(s);
5524
5525     /* Send the last GOB if RTP */
5526     if (s->avctx->rtp_callback) {
5527         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5528         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5529         /* Call the RTP callback to send the last GOB */
5530         emms_c();
5531         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5532     }
5533
5534     return 0;
5535 }
5536
5537 #define MERGE(field) dst->field += src->field; src->field=0
5538 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5539     MERGE(me.scene_change_score);
5540     MERGE(me.mc_mb_var_sum_temp);
5541     MERGE(me.mb_var_sum_temp);
5542 }
5543
5544 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5545     int i;
5546
5547     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5548     MERGE(dct_count[1]);
5549     MERGE(mv_bits);
5550     MERGE(i_tex_bits);
5551     MERGE(p_tex_bits);
5552     MERGE(i_count);
5553     MERGE(f_count);
5554     MERGE(b_count);
5555     MERGE(skip_count);
5556     MERGE(misc_bits);
5557     MERGE(error_count);
5558     MERGE(padding_bug_score);
5559     MERGE(current_picture.error[0]);
5560     MERGE(current_picture.error[1]);
5561     MERGE(current_picture.error[2]);
5562
5563     if(dst->avctx->noise_reduction){
5564         for(i=0; i<64; i++){
5565             MERGE(dct_error_sum[0][i]);
5566             MERGE(dct_error_sum[1][i]);
5567         }
5568     }
5569
5570     assert(put_bits_count(&src->pb) % 8 ==0);
5571     assert(put_bits_count(&dst->pb) % 8 ==0);
5572     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5573     flush_put_bits(&dst->pb);
5574 }
5575
5576 static int estimate_qp(MpegEncContext *s, int dry_run){
5577     if (s->next_lambda){
5578         s->current_picture_ptr->quality=
5579         s->current_picture.quality = s->next_lambda;
5580         if(!dry_run) s->next_lambda= 0;
5581     } else if (!s->fixed_qscale) {
5582         s->current_picture_ptr->quality=
5583         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5584         if (s->current_picture.quality < 0)
5585             return -1;
5586     }
5587
5588     if(s->adaptive_quant){
5589         switch(s->codec_id){
5590         case CODEC_ID_MPEG4:
5591             ff_clean_mpeg4_qscales(s);
5592             break;
5593         case CODEC_ID_H263:
5594         case CODEC_ID_H263P:
5595         case CODEC_ID_FLV1:
5596             ff_clean_h263_qscales(s);
5597             break;
5598         }
5599
5600         s->lambda= s->lambda_table[0];
5601         //FIXME broken
5602     }else
5603         s->lambda= s->current_picture.quality;
5604 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5605     update_qscale(s);
5606     return 0;
5607 }
5608
5609 static int encode_picture(MpegEncContext *s, int picture_number)
5610 {
5611     int i;
5612     int bits;
5613
5614     s->picture_number = picture_number;
5615
5616     /* Reset the average MB variance */
5617     s->me.mb_var_sum_temp    =
5618     s->me.mc_mb_var_sum_temp = 0;
5619
5620     /* we need to initialize some time vars before we can encode b-frames */
5621     // RAL: Condition added for MPEG1VIDEO
5622     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5623         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5624
5625     s->me.scene_change_score=0;
5626
5627 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5628
5629     if(s->pict_type==I_TYPE){
5630         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5631         else                        s->no_rounding=0;
5632     }else if(s->pict_type!=B_TYPE){
5633         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5634             s->no_rounding ^= 1;
5635     }
5636
5637     if(s->flags & CODEC_FLAG_PASS2){
5638         if (estimate_qp(s,1) < 0)
5639             return -1;
5640         ff_get_2pass_fcode(s);
5641     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5642         if(s->pict_type==B_TYPE)
5643             s->lambda= s->last_lambda_for[s->pict_type];
5644         else
5645             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5646         update_qscale(s);
5647     }
5648
5649     s->mb_intra=0; //for the rate distortion & bit compare functions
5650     for(i=1; i<s->avctx->thread_count; i++){
5651         ff_update_duplicate_context(s->thread_context[i], s);
5652     }
5653
5654     ff_init_me(s);
5655
5656     /* Estimate motion for every MB */
5657     if(s->pict_type != I_TYPE){
5658         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5659         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5660         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5661             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5662                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5663             }
5664         }
5665
5666         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5667     }else /* if(s->pict_type == I_TYPE) */{
5668         /* I-Frame */
5669         for(i=0; i<s->mb_stride*s->mb_height; i++)
5670             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5671
5672         if(!s->fixed_qscale){
5673             /* finding spatial complexity for I-frame rate control */
5674             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5675         }
5676     }
5677     for(i=1; i<s->avctx->thread_count; i++){
5678         merge_context_after_me(s, s->thread_context[i]);
5679     }
5680     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5681     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5682     emms_c();
5683
5684     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5685         s->pict_type= I_TYPE;
5686         for(i=0; i<s->mb_stride*s->mb_height; i++)
5687             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5688 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5689     }
5690
5691     if(!s->umvplus){
5692         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5693             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5694
5695             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5696                 int a,b;
5697                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5698                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5699                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5700             }
5701
5702             ff_fix_long_p_mvs(s);
5703             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5704             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5705                 int j;
5706                 for(i=0; i<2; i++){
5707                     for(j=0; j<2; j++)
5708                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5709                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5710                 }
5711             }
5712         }
5713
5714         if(s->pict_type==B_TYPE){
5715             int a, b;
5716
5717             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5718             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5719             s->f_code = FFMAX(a, b);
5720
5721             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5722             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5723             s->b_code = FFMAX(a, b);
5724
5725             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5726             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5727             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5728             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5729             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5730                 int dir, j;
5731                 for(dir=0; dir<2; dir++){
5732                     for(i=0; i<2; i++){
5733                         for(j=0; j<2; j++){
5734                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5735                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5736                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5737                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5738                         }
5739                     }
5740                 }
5741             }
5742         }
5743     }
5744
5745     if (estimate_qp(s, 0) < 0)
5746         return -1;
5747
5748     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5749         s->qscale= 3; //reduce clipping problems
5750
5751     if (s->out_format == FMT_MJPEG) {
5752         /* for mjpeg, we do include qscale in the matrix */
5753         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5754         for(i=1;i<64;i++){
5755             int j= s->dsp.idct_permutation[i];
5756
5757             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5758         }
5759         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5760                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5761         s->qscale= 8;
5762     }
5763
5764     //FIXME var duplication
5765     s->current_picture_ptr->key_frame=
5766     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5767     s->current_picture_ptr->pict_type=
5768     s->current_picture.pict_type= s->pict_type;
5769
5770     if(s->current_picture.key_frame)
5771         s->picture_in_gop_number=0;
5772
5773     s->last_bits= put_bits_count(&s->pb);
5774     switch(s->out_format) {
5775     case FMT_MJPEG:
5776         if (ENABLE_MJPEG_ENCODER)
5777             ff_mjpeg_encode_picture_header(s);
5778         break;
5779     case FMT_H261:
5780         if (ENABLE_H261_ENCODER)
5781             ff_h261_encode_picture_header(s, picture_number);
5782         break;
5783     case FMT_H263:
5784         if (ENABLE_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
5785             ff_wmv2_encode_picture_header(s, picture_number);
5786         else if (ENABLE_MSMPEG4_ENCODER && s->h263_msmpeg4)
5787             msmpeg4_encode_picture_header(s, picture_number);
5788         else if (s->h263_pred)
5789             mpeg4_encode_picture_header(s, picture_number);
5790         else if (ENABLE_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
5791             rv10_encode_picture_header(s, picture_number);
5792         else if (ENABLE_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
5793             rv20_encode_picture_header(s, picture_number);
5794         else if (s->codec_id == CODEC_ID_FLV1)
5795             ff_flv_encode_picture_header(s, picture_number);
5796         else
5797             h263_encode_picture_header(s, picture_number);
5798         break;
5799     case FMT_MPEG1:
5800         mpeg1_encode_picture_header(s, picture_number);
5801         break;
5802     case FMT_H264:
5803         break;
5804     default:
5805         assert(0);
5806     }
5807     bits= put_bits_count(&s->pb);
5808     s->header_bits= bits - s->last_bits;
5809
5810     for(i=1; i<s->avctx->thread_count; i++){
5811         update_duplicate_context_after_me(s->thread_context[i], s);
5812     }
5813     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5814     for(i=1; i<s->avctx->thread_count; i++){
5815         merge_context_after_encode(s, s->thread_context[i]);
5816     }
5817     emms_c();
5818     return 0;
5819 }
5820
5821 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5822     const int intra= s->mb_intra;
5823     int i;
5824
5825     s->dct_count[intra]++;
5826
5827     for(i=0; i<64; i++){
5828         int level= block[i];
5829
5830         if(level){
5831             if(level>0){
5832                 s->dct_error_sum[intra][i] += level;
5833                 level -= s->dct_offset[intra][i];
5834                 if(level<0) level=0;
5835             }else{
5836                 s->dct_error_sum[intra][i] -= level;
5837                 level += s->dct_offset[intra][i];
5838                 if(level>0) level=0;
5839             }
5840             block[i]= level;
5841         }
5842     }
5843 }
5844
5845 static int dct_quantize_trellis_c(MpegEncContext *s,
5846                         DCTELEM *block, int n,
5847                         int qscale, int *overflow){
5848     const int *qmat;
5849     const uint8_t *scantable= s->intra_scantable.scantable;
5850     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5851     int max=0;
5852     unsigned int threshold1, threshold2;
5853     int bias=0;
5854     int run_tab[65];
5855     int level_tab[65];
5856     int score_tab[65];
5857     int survivor[65];
5858     int survivor_count;
5859     int last_run=0;
5860     int last_level=0;
5861     int last_score= 0;
5862     int last_i;
5863     int coeff[2][64];
5864     int coeff_count[64];
5865     int qmul, qadd, start_i, last_non_zero, i, dc;
5866     const int esc_length= s->ac_esc_length;
5867     uint8_t * length;
5868     uint8_t * last_length;
5869     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5870
5871     s->dsp.fdct (block);
5872
5873     if(s->dct_error_sum)
5874         s->denoise_dct(s, block);
5875     qmul= qscale*16;
5876     qadd= ((qscale-1)|1)*8;
5877
5878     if (s->mb_intra) {
5879         int q;
5880         if (!s->h263_aic) {
5881             if (n < 4)
5882                 q = s->y_dc_scale;
5883             else
5884                 q = s->c_dc_scale;
5885             q = q << 3;
5886         } else{
5887             /* For AIC we skip quant/dequant of INTRADC */
5888             q = 1 << 3;
5889             qadd=0;
5890         }
5891
5892         /* note: block[0] is assumed to be positive */
5893         block[0] = (block[0] + (q >> 1)) / q;
5894         start_i = 1;
5895         last_non_zero = 0;
5896         qmat = s->q_intra_matrix[qscale];
5897         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5898             bias= 1<<(QMAT_SHIFT-1);
5899         length     = s->intra_ac_vlc_length;
5900         last_length= s->intra_ac_vlc_last_length;
5901     } else {
5902         start_i = 0;
5903         last_non_zero = -1;
5904         qmat = s->q_inter_matrix[qscale];
5905         length     = s->inter_ac_vlc_length;
5906         last_length= s->inter_ac_vlc_last_length;
5907     }
5908     last_i= start_i;
5909
5910     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5911     threshold2= (threshold1<<1);
5912
5913     for(i=63; i>=start_i; i--) {
5914         const int j = scantable[i];
5915         int level = block[j] * qmat[j];
5916
5917         if(((unsigned)(level+threshold1))>threshold2){
5918             last_non_zero = i;
5919             break;
5920         }
5921     }
5922
5923     for(i=start_i; i<=last_non_zero; i++) {
5924         const int j = scantable[i];
5925         int level = block[j] * qmat[j];
5926
5927 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5928 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5929         if(((unsigned)(level+threshold1))>threshold2){
5930             if(level>0){
5931                 level= (bias + level)>>QMAT_SHIFT;
5932                 coeff[0][i]= level;
5933                 coeff[1][i]= level-1;
5934 //                coeff[2][k]= level-2;
5935             }else{
5936                 level= (bias - level)>>QMAT_SHIFT;
5937                 coeff[0][i]= -level;
5938                 coeff[1][i]= -level+1;
5939 //                coeff[2][k]= -level+2;
5940             }
5941             coeff_count[i]= FFMIN(level, 2);
5942             assert(coeff_count[i]);
5943             max |=level;
5944         }else{
5945             coeff[0][i]= (level>>31)|1;
5946             coeff_count[i]= 1;
5947         }
5948     }
5949
5950     *overflow= s->max_qcoeff < max; //overflow might have happened
5951
5952     if(last_non_zero < start_i){
5953         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5954         return last_non_zero;
5955     }
5956
5957     score_tab[start_i]= 0;
5958     survivor[0]= start_i;
5959     survivor_count= 1;
5960
5961     for(i=start_i; i<=last_non_zero; i++){
5962         int level_index, j;
5963         const int dct_coeff= FFABS(block[ scantable[i] ]);
5964         const int zero_distoration= dct_coeff*dct_coeff;
5965         int best_score=256*256*256*120;
5966         for(level_index=0; level_index < coeff_count[i]; level_index++){
5967             int distoration;
5968             int level= coeff[level_index][i];
5969             const int alevel= FFABS(level);
5970             int unquant_coeff;
5971
5972             assert(level);
5973
5974             if(s->out_format == FMT_H263){
5975                 unquant_coeff= alevel*qmul + qadd;
5976             }else{ //MPEG1
5977                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5978                 if(s->mb_intra){
5979                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5980                         unquant_coeff =   (unquant_coeff - 1) | 1;
5981                 }else{
5982                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5983                         unquant_coeff =   (unquant_coeff - 1) | 1;
5984                 }
5985                 unquant_coeff<<= 3;
5986             }
5987
5988             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5989             level+=64;
5990             if((level&(~127)) == 0){
5991                 for(j=survivor_count-1; j>=0; j--){
5992                     int run= i - survivor[j];
5993                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5994                     score += score_tab[i-run];
5995
5996                     if(score < best_score){
5997                         best_score= score;
5998                         run_tab[i+1]= run;
5999                         level_tab[i+1]= level-64;
6000                     }
6001                 }
6002
6003                 if(s->out_format == FMT_H263){
6004                     for(j=survivor_count-1; j>=0; j--){
6005                         int run= i - survivor[j];
6006                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
6007                         score += score_tab[i-run];
6008                         if(score < last_score){
6009                             last_score= score;
6010                             last_run= run;
6011                             last_level= level-64;
6012                             last_i= i+1;
6013                         }
6014                     }
6015                 }
6016             }else{
6017                 distoration += esc_length*lambda;
6018                 for(j=survivor_count-1; j>=0; j--){
6019                     int run= i - survivor[j];
6020                     int score= distoration + score_tab[i-run];
6021
6022                     if(score < best_score){
6023                         best_score= score;
6024                         run_tab[i+1]= run;
6025                         level_tab[i+1]= level-64;
6026                     }
6027                 }
6028
6029                 if(s->out_format == FMT_H263){
6030                   for(j=survivor_count-1; j>=0; j--){
6031                         int run= i - survivor[j];
6032                         int score= distoration + score_tab[i-run];
6033                         if(score < last_score){
6034                             last_score= score;
6035                             last_run= run;
6036                             last_level= level-64;
6037                             last_i= i+1;
6038                         }
6039                     }
6040                 }
6041             }
6042         }
6043
6044         score_tab[i+1]= best_score;
6045
6046         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6047         if(last_non_zero <= 27){
6048             for(; survivor_count; survivor_count--){
6049                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6050                     break;
6051             }
6052         }else{
6053             for(; survivor_count; survivor_count--){
6054                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6055                     break;
6056             }
6057         }
6058
6059         survivor[ survivor_count++ ]= i+1;
6060     }
6061
6062     if(s->out_format != FMT_H263){
6063         last_score= 256*256*256*120;
6064         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6065             int score= score_tab[i];
6066             if(i) score += lambda*2; //FIXME exacter?
6067
6068             if(score < last_score){
6069                 last_score= score;
6070                 last_i= i;
6071                 last_level= level_tab[i];
6072                 last_run= run_tab[i];
6073             }
6074         }
6075     }
6076
6077     s->coded_score[n] = last_score;
6078
6079     dc= FFABS(block[0]);
6080     last_non_zero= last_i - 1;
6081     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6082
6083     if(last_non_zero < start_i)
6084         return last_non_zero;
6085
6086     if(last_non_zero == 0 && start_i == 0){
6087         int best_level= 0;
6088         int best_score= dc * dc;
6089
6090         for(i=0; i<coeff_count[0]; i++){
6091             int level= coeff[i][0];
6092             int alevel= FFABS(level);
6093             int unquant_coeff, score, distortion;
6094
6095             if(s->out_format == FMT_H263){
6096                     unquant_coeff= (alevel*qmul + qadd)>>3;
6097             }else{ //MPEG1
6098                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6099                     unquant_coeff =   (unquant_coeff - 1) | 1;
6100             }
6101             unquant_coeff = (unquant_coeff + 4) >> 3;
6102             unquant_coeff<<= 3 + 3;
6103
6104             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6105             level+=64;
6106             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6107             else                    score= distortion + esc_length*lambda;
6108
6109             if(score < best_score){
6110                 best_score= score;
6111                 best_level= level - 64;
6112             }
6113         }
6114         block[0]= best_level;
6115         s->coded_score[n] = best_score - dc*dc;
6116         if(best_level == 0) return -1;
6117         else                return last_non_zero;
6118     }
6119
6120     i= last_i;
6121     assert(last_level);
6122
6123     block[ perm_scantable[last_non_zero] ]= last_level;
6124     i -= last_run + 1;
6125
6126     for(; i>start_i; i -= run_tab[i] + 1){
6127         block[ perm_scantable[i-1] ]= level_tab[i];
6128     }
6129
6130     return last_non_zero;
6131 }
6132
6133 //#define REFINE_STATS 1
6134 static int16_t basis[64][64];
6135
6136 static void build_basis(uint8_t *perm){
6137     int i, j, x, y;
6138     emms_c();
6139     for(i=0; i<8; i++){
6140         for(j=0; j<8; j++){
6141             for(y=0; y<8; y++){
6142                 for(x=0; x<8; x++){
6143                     double s= 0.25*(1<<BASIS_SHIFT);
6144                     int index= 8*i + j;
6145                     int perm_index= perm[index];
6146                     if(i==0) s*= sqrt(0.5);
6147                     if(j==0) s*= sqrt(0.5);
6148                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6149                 }
6150             }
6151         }
6152     }
6153 }
6154
6155 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6156                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6157                         int n, int qscale){
6158     int16_t rem[64];
6159     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6160     const int *qmat;
6161     const uint8_t *scantable= s->intra_scantable.scantable;
6162     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6163 //    unsigned int threshold1, threshold2;
6164 //    int bias=0;
6165     int run_tab[65];
6166     int prev_run=0;
6167     int prev_level=0;
6168     int qmul, qadd, start_i, last_non_zero, i, dc;
6169     uint8_t * length;
6170     uint8_t * last_length;
6171     int lambda;
6172     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6173 #ifdef REFINE_STATS
6174 static int count=0;
6175 static int after_last=0;
6176 static int to_zero=0;
6177 static int from_zero=0;
6178 static int raise=0;
6179 static int lower=0;
6180 static int messed_sign=0;
6181 #endif
6182
6183     if(basis[0][0] == 0)
6184         build_basis(s->dsp.idct_permutation);
6185
6186     qmul= qscale*2;
6187     qadd= (qscale-1)|1;
6188     if (s->mb_intra) {
6189         if (!s->h263_aic) {
6190             if (n < 4)
6191                 q = s->y_dc_scale;
6192             else
6193                 q = s->c_dc_scale;
6194         } else{
6195             /* For AIC we skip quant/dequant of INTRADC */
6196             q = 1;
6197             qadd=0;
6198         }
6199         q <<= RECON_SHIFT-3;
6200         /* note: block[0] is assumed to be positive */
6201         dc= block[0]*q;
6202 //        block[0] = (block[0] + (q >> 1)) / q;
6203         start_i = 1;
6204         qmat = s->q_intra_matrix[qscale];
6205 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6206 //            bias= 1<<(QMAT_SHIFT-1);
6207         length     = s->intra_ac_vlc_length;
6208         last_length= s->intra_ac_vlc_last_length;
6209     } else {
6210         dc= 0;
6211         start_i = 0;
6212         qmat = s->q_inter_matrix[qscale];
6213         length     = s->inter_ac_vlc_length;
6214         last_length= s->inter_ac_vlc_last_length;
6215     }
6216     last_non_zero = s->block_last_index[n];
6217
6218 #ifdef REFINE_STATS
6219 {START_TIMER
6220 #endif
6221     dc += (1<<(RECON_SHIFT-1));
6222     for(i=0; i<64; i++){
6223         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6224     }
6225 #ifdef REFINE_STATS
6226 STOP_TIMER("memset rem[]")}
6227 #endif
6228     sum=0;
6229     for(i=0; i<64; i++){
6230         int one= 36;
6231         int qns=4;
6232         int w;
6233
6234         w= FFABS(weight[i]) + qns*one;
6235         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6236
6237         weight[i] = w;
6238 //        w=weight[i] = (63*qns + (w/2)) / w;
6239
6240         assert(w>0);
6241         assert(w<(1<<6));
6242         sum += w*w;
6243     }
6244     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6245 #ifdef REFINE_STATS
6246 {START_TIMER
6247 #endif
6248     run=0;
6249     rle_index=0;
6250     for(i=start_i; i<=last_non_zero; i++){
6251         int j= perm_scantable[i];
6252         const int level= block[j];
6253         int coeff;
6254
6255         if(level){
6256             if(level<0) coeff= qmul*level - qadd;
6257             else        coeff= qmul*level + qadd;
6258             run_tab[rle_index++]=run;
6259             run=0;
6260
6261             s->dsp.add_8x8basis(rem, basis[j], coeff);
6262         }else{
6263             run++;
6264         }
6265     }
6266 #ifdef REFINE_STATS
6267 if(last_non_zero>0){
6268 STOP_TIMER("init rem[]")
6269 }
6270 }
6271
6272 {START_TIMER
6273 #endif
6274     for(;;){
6275         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6276         int best_coeff=0;
6277         int best_change=0;
6278         int run2, best_unquant_change=0, analyze_gradient;
6279 #ifdef REFINE_STATS
6280 {START_TIMER
6281 #endif
6282         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6283
6284         if(analyze_gradient){
6285 #ifdef REFINE_STATS
6286 {START_TIMER
6287 #endif
6288             for(i=0; i<64; i++){
6289                 int w= weight[i];
6290
6291                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6292             }
6293 #ifdef REFINE_STATS
6294 STOP_TIMER("rem*w*w")}
6295 {START_TIMER
6296 #endif
6297             s->dsp.fdct(d1);
6298 #ifdef REFINE_STATS
6299 STOP_TIMER("dct")}
6300 #endif
6301         }
6302
6303         if(start_i){
6304             const int level= block[0];
6305             int change, old_coeff;
6306
6307             assert(s->mb_intra);
6308
6309             old_coeff= q*level;
6310
6311             for(change=-1; change<=1; change+=2){
6312                 int new_level= level + change;
6313                 int score, new_coeff;
6314
6315                 new_coeff= q*new_level;
6316                 if(new_coeff >= 2048 || new_coeff < 0)
6317                     continue;
6318
6319                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6320                 if(score<best_score){
6321                     best_score= score;
6322                     best_coeff= 0;
6323                     best_change= change;
6324                     best_unquant_change= new_coeff - old_coeff;
6325                 }
6326             }
6327         }
6328
6329         run=0;
6330         rle_index=0;
6331         run2= run_tab[rle_index++];
6332         prev_level=0;
6333         prev_run=0;
6334
6335         for(i=start_i; i<64; i++){
6336             int j= perm_scantable[i];
6337             const int level= block[j];
6338             int change, old_coeff;
6339
6340             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6341                 break;
6342
6343             if(level){
6344                 if(level<0) old_coeff= qmul*level - qadd;
6345                 else        old_coeff= qmul*level + qadd;
6346                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6347             }else{
6348                 old_coeff=0;
6349                 run2--;
6350                 assert(run2>=0 || i >= last_non_zero );
6351             }
6352
6353             for(change=-1; change<=1; change+=2){
6354                 int new_level= level + change;
6355                 int score, new_coeff, unquant_change;
6356
6357                 score=0;
6358                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6359                    continue;
6360
6361                 if(new_level){
6362                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6363                     else            new_coeff= qmul*new_level + qadd;
6364                     if(new_coeff >= 2048 || new_coeff <= -2048)
6365                         continue;
6366                     //FIXME check for overflow
6367
6368                     if(level){
6369                         if(level < 63 && level > -63){
6370                             if(i < last_non_zero)
6371                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6372                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6373                             else
6374                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6375                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6376                         }
6377                     }else{
6378                         assert(FFABS(new_level)==1);
6379
6380                         if(analyze_gradient){
6381                             int g= d1[ scantable[i] ];
6382                             if(g && (g^new_level) >= 0)
6383                                 continue;
6384                         }
6385
6386                         if(i < last_non_zero){
6387                             int next_i= i + run2 + 1;
6388                             int next_level= block[ perm_scantable[next_i] ] + 64;
6389
6390                             if(next_level&(~127))
6391                                 next_level= 0;
6392
6393                             if(next_i < last_non_zero)
6394                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6395                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6396                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6397                             else
6398                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6399                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6400                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6401                         }else{
6402                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6403                             if(prev_level){
6404                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6405                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6406                             }
6407                         }
6408                     }
6409                 }else{
6410                     new_coeff=0;
6411                     assert(FFABS(level)==1);
6412
6413                     if(i < last_non_zero){
6414                         int next_i= i + run2 + 1;
6415                         int next_level= block[ perm_scantable[next_i] ] + 64;
6416
6417                         if(next_level&(~127))
6418                             next_level= 0;
6419
6420                         if(next_i < last_non_zero)
6421                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6422                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6423                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6424                         else
6425                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6426                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6427                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6428                     }else{
6429                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6430                         if(prev_level){
6431                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6432                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6433                         }
6434                     }
6435                 }
6436
6437                 score *= lambda;
6438
6439                 unquant_change= new_coeff - old_coeff;
6440                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6441
6442                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6443                 if(score<best_score){
6444                     best_score= score;
6445                     best_coeff= i;
6446                     best_change= change;
6447                     best_unquant_change= unquant_change;
6448                 }
6449             }
6450             if(level){
6451                 prev_level= level + 64;
6452                 if(prev_level&(~127))
6453                     prev_level= 0;
6454                 prev_run= run;
6455                 run=0;
6456             }else{
6457                 run++;
6458             }
6459         }
6460 #ifdef REFINE_STATS
6461 STOP_TIMER("iterative step")}
6462 #endif
6463
6464         if(best_change){
6465             int j= perm_scantable[ best_coeff ];
6466
6467             block[j] += best_change;
6468
6469             if(best_coeff > last_non_zero){
6470                 last_non_zero= best_coeff;
6471                 assert(block[j]);
6472 #ifdef REFINE_STATS
6473 after_last++;
6474 #endif
6475             }else{
6476 #ifdef REFINE_STATS
6477 if(block[j]){
6478     if(block[j] - best_change){
6479         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6480             raise++;
6481         }else{
6482             lower++;
6483         }
6484     }else{
6485         from_zero++;
6486     }
6487 }else{
6488     to_zero++;
6489 }
6490 #endif
6491                 for(; last_non_zero>=start_i; last_non_zero--){
6492                     if(block[perm_scantable[last_non_zero]])
6493                         break;
6494                 }
6495             }
6496 #ifdef REFINE_STATS
6497 count++;
6498 if(256*256*256*64 % count == 0){
6499     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6500 }
6501 #endif
6502             run=0;
6503             rle_index=0;
6504             for(i=start_i; i<=last_non_zero; i++){
6505                 int j= perm_scantable[i];
6506                 const int level= block[j];
6507
6508                  if(level){
6509                      run_tab[rle_index++]=run;
6510                      run=0;
6511                  }else{
6512                      run++;
6513                  }
6514             }
6515
6516             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6517         }else{
6518             break;
6519         }
6520     }
6521 #ifdef REFINE_STATS
6522 if(last_non_zero>0){
6523 STOP_TIMER("iterative search")
6524 }
6525 }
6526 #endif
6527
6528     return last_non_zero;
6529 }
6530
6531 static int dct_quantize_c(MpegEncContext *s,
6532                         DCTELEM *block, int n,
6533                         int qscale, int *overflow)
6534 {
6535     int i, j, level, last_non_zero, q, start_i;
6536     const int *qmat;
6537     const uint8_t *scantable= s->intra_scantable.scantable;
6538     int bias;
6539     int max=0;
6540     unsigned int threshold1, threshold2;
6541
6542     s->dsp.fdct (block);
6543
6544     if(s->dct_error_sum)
6545         s->denoise_dct(s, block);
6546
6547     if (s->mb_intra) {
6548         if (!s->h263_aic) {
6549             if (n < 4)
6550                 q = s->y_dc_scale;
6551             else
6552                 q = s->c_dc_scale;
6553             q = q << 3;
6554         } else
6555             /* For AIC we skip quant/dequant of INTRADC */
6556             q = 1 << 3;
6557
6558         /* note: block[0] is assumed to be positive */
6559         block[0] = (block[0] + (q >> 1)) / q;
6560         start_i = 1;
6561         last_non_zero = 0;
6562         qmat = s->q_intra_matrix[qscale];
6563         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6564     } else {
6565         start_i = 0;
6566         last_non_zero = -1;
6567         qmat = s->q_inter_matrix[qscale];
6568         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6569     }
6570     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6571     threshold2= (threshold1<<1);
6572     for(i=63;i>=start_i;i--) {
6573         j = scantable[i];
6574         level = block[j] * qmat[j];
6575
6576         if(((unsigned)(level+threshold1))>threshold2){
6577             last_non_zero = i;
6578             break;
6579         }else{
6580             block[j]=0;
6581         }
6582     }
6583     for(i=start_i; i<=last_non_zero; i++) {
6584         j = scantable[i];
6585         level = block[j] * qmat[j];
6586
6587 //        if(   bias+level >= (1<<QMAT_SHIFT)
6588 //           || bias-level >= (1<<QMAT_SHIFT)){
6589         if(((unsigned)(level+threshold1))>threshold2){
6590             if(level>0){
6591                 level= (bias + level)>>QMAT_SHIFT;
6592                 block[j]= level;
6593             }else{
6594                 level= (bias - level)>>QMAT_SHIFT;
6595                 block[j]= -level;
6596             }
6597             max |=level;
6598         }else{
6599             block[j]=0;
6600         }
6601     }
6602     *overflow= s->max_qcoeff < max; //overflow might have happened
6603
6604     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6605     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6606         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6607
6608     return last_non_zero;
6609 }
6610
6611 #endif //CONFIG_ENCODERS
6612
6613 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6614                                    DCTELEM *block, int n, int qscale)
6615 {
6616     int i, level, nCoeffs;
6617     const uint16_t *quant_matrix;
6618
6619     nCoeffs= s->block_last_index[n];
6620
6621     if (n < 4)
6622         block[0] = block[0] * s->y_dc_scale;
6623     else
6624         block[0] = block[0] * s->c_dc_scale;
6625     /* XXX: only mpeg1 */
6626     quant_matrix = s->intra_matrix;
6627     for(i=1;i<=nCoeffs;i++) {
6628         int j= s->intra_scantable.permutated[i];
6629         level = block[j];
6630         if (level) {
6631             if (level < 0) {
6632                 level = -level;
6633                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6634                 level = (level - 1) | 1;
6635                 level = -level;
6636             } else {
6637                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6638                 level = (level - 1) | 1;
6639             }
6640             block[j] = level;
6641         }
6642     }
6643 }
6644
6645 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6646                                    DCTELEM *block, int n, int qscale)
6647 {
6648     int i, level, nCoeffs;
6649     const uint16_t *quant_matrix;
6650
6651     nCoeffs= s->block_last_index[n];
6652
6653     quant_matrix = s->inter_matrix;
6654     for(i=0; i<=nCoeffs; i++) {
6655         int j= s->intra_scantable.permutated[i];
6656         level = block[j];
6657         if (level) {
6658             if (level < 0) {
6659                 level = -level;
6660                 level = (((level << 1) + 1) * qscale *
6661                          ((int) (quant_matrix[j]))) >> 4;
6662                 level = (level - 1) | 1;
6663                 level = -level;
6664             } else {
6665                 level = (((level << 1) + 1) * qscale *
6666                          ((int) (quant_matrix[j]))) >> 4;
6667                 level = (level - 1) | 1;
6668             }
6669             block[j] = level;
6670         }
6671     }
6672 }
6673
6674 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6675                                    DCTELEM *block, int n, int qscale)
6676 {
6677     int i, level, nCoeffs;
6678     const uint16_t *quant_matrix;
6679
6680     if(s->alternate_scan) nCoeffs= 63;
6681     else nCoeffs= s->block_last_index[n];
6682
6683     if (n < 4)
6684         block[0] = block[0] * s->y_dc_scale;
6685     else
6686         block[0] = block[0] * s->c_dc_scale;
6687     quant_matrix = s->intra_matrix;
6688     for(i=1;i<=nCoeffs;i++) {
6689         int j= s->intra_scantable.permutated[i];
6690         level = block[j];
6691         if (level) {
6692             if (level < 0) {
6693                 level = -level;
6694                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6695                 level = -level;
6696             } else {
6697                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6698             }
6699             block[j] = level;
6700         }
6701     }
6702 }
6703
6704 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6705                                    DCTELEM *block, int n, int qscale)
6706 {
6707     int i, level, nCoeffs;
6708     const uint16_t *quant_matrix;
6709     int sum=-1;
6710
6711     if(s->alternate_scan) nCoeffs= 63;
6712     else nCoeffs= s->block_last_index[n];
6713
6714     if (n < 4)
6715         block[0] = block[0] * s->y_dc_scale;
6716     else
6717         block[0] = block[0] * s->c_dc_scale;
6718     quant_matrix = s->intra_matrix;
6719     for(i=1;i<=nCoeffs;i++) {
6720         int j= s->intra_scantable.permutated[i];
6721         level = block[j];
6722         if (level) {
6723             if (level < 0) {
6724                 level = -level;
6725                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6726                 level = -level;
6727             } else {
6728                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6729             }
6730             block[j] = level;
6731             sum+=level;
6732         }
6733     }
6734     block[63]^=sum&1;
6735 }
6736
6737 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6738                                    DCTELEM *block, int n, int qscale)
6739 {
6740     int i, level, nCoeffs;
6741     const uint16_t *quant_matrix;
6742     int sum=-1;
6743
6744     if(s->alternate_scan) nCoeffs= 63;
6745     else nCoeffs= s->block_last_index[n];
6746
6747     quant_matrix = s->inter_matrix;
6748     for(i=0; i<=nCoeffs; i++) {
6749         int j= s->intra_scantable.permutated[i];
6750         level = block[j];
6751         if (level) {
6752             if (level < 0) {
6753                 level = -level;
6754                 level = (((level << 1) + 1) * qscale *
6755                          ((int) (quant_matrix[j]))) >> 4;
6756                 level = -level;
6757             } else {
6758                 level = (((level << 1) + 1) * qscale *
6759                          ((int) (quant_matrix[j]))) >> 4;
6760             }
6761             block[j] = level;
6762             sum+=level;
6763         }
6764     }
6765     block[63]^=sum&1;
6766 }
6767
6768 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6769                                   DCTELEM *block, int n, int qscale)
6770 {
6771     int i, level, qmul, qadd;
6772     int nCoeffs;
6773
6774     assert(s->block_last_index[n]>=0);
6775
6776     qmul = qscale << 1;
6777
6778     if (!s->h263_aic) {
6779         if (n < 4)
6780             block[0] = block[0] * s->y_dc_scale;
6781         else
6782             block[0] = block[0] * s->c_dc_scale;
6783         qadd = (qscale - 1) | 1;
6784     }else{
6785         qadd = 0;
6786     }
6787     if(s->ac_pred)
6788         nCoeffs=63;
6789     else
6790         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6791
6792     for(i=1; i<=nCoeffs; i++) {
6793         level = block[i];
6794         if (level) {
6795             if (level < 0) {
6796                 level = level * qmul - qadd;
6797             } else {
6798                 level = level * qmul + qadd;
6799             }
6800             block[i] = level;
6801         }
6802     }
6803 }
6804
6805 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6806                                   DCTELEM *block, int n, int qscale)
6807 {
6808     int i, level, qmul, qadd;
6809     int nCoeffs;
6810
6811     assert(s->block_last_index[n]>=0);
6812
6813     qadd = (qscale - 1) | 1;
6814     qmul = qscale << 1;
6815
6816     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6817
6818     for(i=0; i<=nCoeffs; i++) {
6819         level = block[i];
6820         if (level) {
6821             if (level < 0) {
6822                 level = level * qmul - qadd;
6823             } else {
6824                 level = level * qmul + qadd;
6825             }
6826             block[i] = level;
6827         }
6828     }
6829 }
6830
6831 #ifdef CONFIG_ENCODERS
6832 AVCodec h263_encoder = {
6833     "h263",
6834     CODEC_TYPE_VIDEO,
6835     CODEC_ID_H263,
6836     sizeof(MpegEncContext),
6837     MPV_encode_init,
6838     MPV_encode_picture,
6839     MPV_encode_end,
6840     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6841 };
6842
6843 AVCodec h263p_encoder = {
6844     "h263p",
6845     CODEC_TYPE_VIDEO,
6846     CODEC_ID_H263P,
6847     sizeof(MpegEncContext),
6848     MPV_encode_init,
6849     MPV_encode_picture,
6850     MPV_encode_end,
6851     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6852 };
6853
6854 AVCodec flv_encoder = {
6855     "flv",
6856     CODEC_TYPE_VIDEO,
6857     CODEC_ID_FLV1,
6858     sizeof(MpegEncContext),
6859     MPV_encode_init,
6860     MPV_encode_picture,
6861     MPV_encode_end,
6862     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6863 };
6864
6865 AVCodec rv10_encoder = {
6866     "rv10",
6867     CODEC_TYPE_VIDEO,
6868     CODEC_ID_RV10,
6869     sizeof(MpegEncContext),
6870     MPV_encode_init,
6871     MPV_encode_picture,
6872     MPV_encode_end,
6873     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6874 };
6875
6876 AVCodec rv20_encoder = {
6877     "rv20",
6878     CODEC_TYPE_VIDEO,
6879     CODEC_ID_RV20,
6880     sizeof(MpegEncContext),
6881     MPV_encode_init,
6882     MPV_encode_picture,
6883     MPV_encode_end,
6884     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6885 };
6886
6887 AVCodec mpeg4_encoder = {
6888     "mpeg4",
6889     CODEC_TYPE_VIDEO,
6890     CODEC_ID_MPEG4,
6891     sizeof(MpegEncContext),
6892     MPV_encode_init,
6893     MPV_encode_picture,
6894     MPV_encode_end,
6895     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6896     .capabilities= CODEC_CAP_DELAY,
6897 };
6898
6899 AVCodec msmpeg4v1_encoder = {
6900     "msmpeg4v1",
6901     CODEC_TYPE_VIDEO,
6902     CODEC_ID_MSMPEG4V1,
6903     sizeof(MpegEncContext),
6904     MPV_encode_init,
6905     MPV_encode_picture,
6906     MPV_encode_end,
6907     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6908 };
6909
6910 AVCodec msmpeg4v2_encoder = {
6911     "msmpeg4v2",
6912     CODEC_TYPE_VIDEO,
6913     CODEC_ID_MSMPEG4V2,
6914     sizeof(MpegEncContext),
6915     MPV_encode_init,
6916     MPV_encode_picture,
6917     MPV_encode_end,
6918     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6919 };
6920
6921 AVCodec msmpeg4v3_encoder = {
6922     "msmpeg4",
6923     CODEC_TYPE_VIDEO,
6924     CODEC_ID_MSMPEG4V3,
6925     sizeof(MpegEncContext),
6926     MPV_encode_init,
6927     MPV_encode_picture,
6928     MPV_encode_end,
6929     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6930 };
6931
6932 AVCodec wmv1_encoder = {
6933     "wmv1",
6934     CODEC_TYPE_VIDEO,
6935     CODEC_ID_WMV1,
6936     sizeof(MpegEncContext),
6937     MPV_encode_init,
6938     MPV_encode_picture,
6939     MPV_encode_end,
6940     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6941 };
6942
6943 #endif //CONFIG_ENCODERS