git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "msmpeg4.h"
  34 #include "faandct.h"
  35 #include <limits.h>
  36
  37 #ifdef USE_FASTMEMCPY
  38 #include "libvo/fastmemcpy.h"
  39 #endif
  40
  41 //#undef NDEBUG
  42 //#include <assert.h>
  43
  44 #ifdef CONFIG_ENCODERS
  45 static int encode_picture(MpegEncContext *s, int picture_number);
  46 #endif //CONFIG_ENCODERS
  47 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  48                                    DCTELEM *block, int n, int qscale);
  49 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  50                                    DCTELEM *block, int n, int qscale);
  51 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  52                                    DCTELEM *block, int n, int qscale);
  53 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  54                                    DCTELEM *block, int n, int qscale);
  55 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  56                                    DCTELEM *block, int n, int qscale);
  57 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  58                                   DCTELEM *block, int n, int qscale);
  59 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  60                                   DCTELEM *block, int n, int qscale);
  61 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  62 #ifdef CONFIG_ENCODERS
  63 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  64 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  65 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  66 static int sse_mb(MpegEncContext *s);
  67 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  68 #endif //CONFIG_ENCODERS
  69
  70 #ifdef HAVE_XVMC
  71 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  72 extern void XVMC_field_end(MpegEncContext *s);
  73 extern void XVMC_decode_mb(MpegEncContext *s);
  74 #endif
  75
  76 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  77
  78
  79 /* enable all paranoid tests for rounding, overflows, etc... */
  80 //#define PARANOID
  81
  82 //#define DEBUG
  83
  84
  85 /* for jpeg fast DCT */
  86 #define CONST_BITS 14
  87
  88 static const uint16_t aanscales[64] = {
  89     /* precomputed values scaled up by 14 bits */
  90     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  91     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  92     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  93     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  94     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  95     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  96     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  97     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  98 };
  99
 100 static const uint8_t h263_chroma_roundtab[16] = {
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 102     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 103 };
 104
 105 static const uint8_t ff_default_chroma_qscale_table[32]={
 106 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 107     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 108 };
 109
 110 #ifdef CONFIG_ENCODERS
 111 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
 112 static uint8_t default_fcode_tab[MAX_MV*2+1];
 113
 114 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 115
 116 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 117                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 118 {
 119     int qscale;
 120     int shift=0;
 121
 122     for(qscale=qmin; qscale<=qmax; qscale++){
 123         int i;
 124         if (dsp->fdct == ff_jpeg_fdct_islow
 125 #ifdef FAAN_POSTSCALE
 126             || dsp->fdct == ff_faandct
 127 #endif
 128             ) {
 129             for(i=0;i<64;i++) {
 130                 const int j= dsp->idct_permutation[i];
 131                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 132                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 133                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 134                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 135
 136                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 137                                 (qscale * quant_matrix[j]));
 138             }
 139         } else if (dsp->fdct == fdct_ifast
 140 #ifndef FAAN_POSTSCALE
 141                    || dsp->fdct == ff_faandct
 142 #endif
 143                    ) {
 144             for(i=0;i<64;i++) {
 145                 const int j= dsp->idct_permutation[i];
 146                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 147                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 148                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 149                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 150
 151                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 152                                 (aanscales[i] * qscale * quant_matrix[j]));
 153             }
 154         } else {
 155             for(i=0;i<64;i++) {
 156                 const int j= dsp->idct_permutation[i];
 157                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 158                    So 16           <= qscale * quant_matrix[i]             <= 7905
 159                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 160                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 161                 */
 162                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 163 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 164                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 165
 166                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 167                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 168             }
 169         }
 170
 171         for(i=intra; i<64; i++){
 172             int64_t max= 8191;
 173             if (dsp->fdct == fdct_ifast
 174 #ifndef FAAN_POSTSCALE
 175                    || dsp->fdct == ff_faandct
 176 #endif
 177                    ) {
 178                 max= (8191LL*aanscales[i]) >> 14;
 179             }
 180             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 181                 shift++;
 182             }
 183         }
 184     }
 185     if(shift){
 186         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
 187     }
 188 }
 189
 190 static inline void update_qscale(MpegEncContext *s){
 191     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 192     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 193
 194     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 195 }
 196 #endif //CONFIG_ENCODERS
 197
 198 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 199     int i;
 200     int end;
 201
 202     st->scantable= src_scantable;
 203
 204     for(i=0; i<64; i++){
 205         int j;
 206         j = src_scantable[i];
 207         st->permutated[i] = permutation[j];
 208 #ifdef ARCH_POWERPC
 209         st->inverse[j] = i;
 210 #endif
 211     }
 212
 213     end=-1;
 214     for(i=0; i<64; i++){
 215         int j;
 216         j = st->permutated[i];
 217         if(j>end) end=j;
 218         st->raster_end[i]= end;
 219     }
 220 }
 221
 222 #ifdef CONFIG_ENCODERS
 223 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 224     int i;
 225
 226     if(matrix){
 227         put_bits(pb, 1, 1);
 228         for(i=0;i<64;i++) {
 229             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 230         }
 231     }else
 232         put_bits(pb, 1, 0);
 233 }
 234 #endif //CONFIG_ENCODERS
 235
 236 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 237     int i;
 238
 239     assert(p<=end);
 240     if(p>=end)
 241         return end;
 242
 243     for(i=0; i<3; i++){
 244         uint32_t tmp= *state << 8;
 245         *state= tmp + *(p++);
 246         if(tmp == 0x100 || p==end)
 247             return p;
 248     }
 249
 250     while(p<end){
 251         if     (p[-1] > 1      ) p+= 3;
 252         else if(p[-2]          ) p+= 2;
 253         else if(p[-3]|(p[-1]-1)) p++;
 254         else{
 255             p++;
 256             break;
 257         }
 258     }
 259
 260     p= FFMIN(p, end)-4;
 261     *state=  be2me_32(unaligned32(p));
 262
 263     return p+4;
 264 }
 265
 266 /* init common dct for both encoder and decoder */
 267 int DCT_common_init(MpegEncContext *s)
 268 {
 269     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 270     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 271     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 272     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 273     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 274     if(s->flags & CODEC_FLAG_BITEXACT)
 275         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 276     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 277
 278 #ifdef CONFIG_ENCODERS
 279     s->dct_quantize= dct_quantize_c;
 280     s->denoise_dct= denoise_dct_c;
 281 #endif //CONFIG_ENCODERS
 282
 283 #ifdef HAVE_MMX
 284     MPV_common_init_mmx(s);
 285 #endif
 286 #ifdef ARCH_ALPHA
 287     MPV_common_init_axp(s);
 288 #endif
 289 #ifdef HAVE_MLIB
 290     MPV_common_init_mlib(s);
 291 #endif
 292 #ifdef HAVE_MMI
 293     MPV_common_init_mmi(s);
 294 #endif
 295 #ifdef ARCH_ARMV4L
 296     MPV_common_init_armv4l(s);
 297 #endif
 298 #ifdef ARCH_POWERPC
 299     MPV_common_init_ppc(s);
 300 #endif
 301
 302 #ifdef CONFIG_ENCODERS
 303     s->fast_dct_quantize= s->dct_quantize;
 304
 305     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 306         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 307     }
 308
 309 #endif //CONFIG_ENCODERS
 310
 311     /* load & permutate scantables
 312        note: only wmv uses different ones
 313     */
 314     if(s->alternate_scan){
 315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 317     }else{
 318         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 319         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 320     }
 321     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 322     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 323
 324     return 0;
 325 }
 326
 327 static void copy_picture(Picture *dst, Picture *src){
 328     *dst = *src;
 329     dst->type= FF_BUFFER_TYPE_COPY;
 330 }
 331
 332 #ifdef CONFIG_ENCODERS
 333 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 334     int i;
 335
 336     dst->pict_type              = src->pict_type;
 337     dst->quality                = src->quality;
 338     dst->coded_picture_number   = src->coded_picture_number;
 339     dst->display_picture_number = src->display_picture_number;
 340 //    dst->reference              = src->reference;
 341     dst->pts                    = src->pts;
 342     dst->interlaced_frame       = src->interlaced_frame;
 343     dst->top_field_first        = src->top_field_first;
 344
 345     if(s->avctx->me_threshold){
 346         if(!src->motion_val[0])
 347             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 348         if(!src->mb_type)
 349             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 350         if(!src->ref_index[0])
 351             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 352         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 353             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 354             src->motion_subsample_log2, dst->motion_subsample_log2);
 355
 356         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 357
 358         for(i=0; i<2; i++){
 359             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 360             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 361
 362             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 363                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 364             }
 365             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 366                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 367             }
 368         }
 369     }
 370 }
 371 #endif
 372
 373 /**
 374  * allocates a Picture
 375  * The pixels are allocated/set by calling get_buffer() if shared=0
 376  */
 377 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 378     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 379     const int mb_array_size= s->mb_stride*s->mb_height;
 380     const int b8_array_size= s->b8_stride*s->mb_height*2;
 381     const int b4_array_size= s->b4_stride*s->mb_height*4;
 382     int i;
 383
 384     if(shared){
 385         assert(pic->data[0]);
 386         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 387         pic->type= FF_BUFFER_TYPE_SHARED;
 388     }else{
 389         int r;
 390
 391         assert(!pic->data[0]);
 392
 393         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 394
 395         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 396             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 397             return -1;
 398         }
 399
 400         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 401             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 402             return -1;
 403         }
 404
 405         if(pic->linesize[1] != pic->linesize[2]){
 406             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 407             return -1;
 408         }
 409
 410         s->linesize  = pic->linesize[0];
 411         s->uvlinesize= pic->linesize[1];
 412     }
 413
 414     if(pic->qscale_table==NULL){
 415         if (s->encoding) {
 416             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 417             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 418             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 419         }
 420
 421         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 422         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 423         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 424         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 425         if(s->out_format == FMT_H264){
 426             for(i=0; i<2; i++){
 427                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 428                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 429                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 430             }
 431             pic->motion_subsample_log2= 2;
 432         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 433             for(i=0; i<2; i++){
 434                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 435                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 436                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 437             }
 438             pic->motion_subsample_log2= 3;
 439         }
 440         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 441             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 442         }
 443         pic->qstride= s->mb_stride;
 444         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 445     }
 446
 447     //it might be nicer if the application would keep track of these but it would require a API change
 448     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 449     s->prev_pict_types[0]= s->pict_type;
 450     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 451         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 452
 453     return 0;
 454 fail: //for the CHECKED_ALLOCZ macro
 455     return -1;
 456 }
 457
 458 /**
 459  * deallocates a picture
 460  */
 461 static void free_picture(MpegEncContext *s, Picture *pic){
 462     int i;
 463
 464     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 465         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 466     }
 467
 468     av_freep(&pic->mb_var);
 469     av_freep(&pic->mc_mb_var);
 470     av_freep(&pic->mb_mean);
 471     av_freep(&pic->mbskip_table);
 472     av_freep(&pic->qscale_table);
 473     av_freep(&pic->mb_type_base);
 474     av_freep(&pic->dct_coeff);
 475     av_freep(&pic->pan_scan);
 476     pic->mb_type= NULL;
 477     for(i=0; i<2; i++){
 478         av_freep(&pic->motion_val_base[i]);
 479         av_freep(&pic->ref_index[i]);
 480     }
 481
 482     if(pic->type == FF_BUFFER_TYPE_SHARED){
 483         for(i=0; i<4; i++){
 484             pic->base[i]=
 485             pic->data[i]= NULL;
 486         }
 487         pic->type= 0;
 488     }
 489 }
 490
 491 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 492     int i;
 493
 494     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 495     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 496     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 497
 498      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 499     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 500     s->rd_scratchpad=   s->me.scratchpad;
 501     s->b_scratchpad=    s->me.scratchpad;
 502     s->obmc_scratchpad= s->me.scratchpad + 16;
 503     if (s->encoding) {
 504         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 505         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 506         if(s->avctx->noise_reduction){
 507             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 508         }
 509     }
 510     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 511     s->block= s->blocks[0];
 512
 513     for(i=0;i<12;i++){
 514         s->pblocks[i] = (short *)(&s->block[i]);
 515     }
 516     return 0;
 517 fail:
 518     return -1; //free() through MPV_common_end()
 519 }
 520
 521 static void free_duplicate_context(MpegEncContext *s){
 522     if(s==NULL) return;
 523
 524     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 525     av_freep(&s->me.scratchpad);
 526     s->rd_scratchpad=
 527     s->b_scratchpad=
 528     s->obmc_scratchpad= NULL;
 529
 530     av_freep(&s->dct_error_sum);
 531     av_freep(&s->me.map);
 532     av_freep(&s->me.score_map);
 533     av_freep(&s->blocks);
 534     s->block= NULL;
 535 }
 536
 537 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 538 #define COPY(a) bak->a= src->a
 539     COPY(allocated_edge_emu_buffer);
 540     COPY(edge_emu_buffer);
 541     COPY(me.scratchpad);
 542     COPY(rd_scratchpad);
 543     COPY(b_scratchpad);
 544     COPY(obmc_scratchpad);
 545     COPY(me.map);
 546     COPY(me.score_map);
 547     COPY(blocks);
 548     COPY(block);
 549     COPY(start_mb_y);
 550     COPY(end_mb_y);
 551     COPY(me.map_generation);
 552     COPY(pb);
 553     COPY(dct_error_sum);
 554     COPY(dct_count[0]);
 555     COPY(dct_count[1]);
 556 #undef COPY
 557 }
 558
 559 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 560     MpegEncContext bak;
 561     int i;
 562     //FIXME copy only needed parts
 563 //START_TIMER
 564     backup_duplicate_context(&bak, dst);
 565     memcpy(dst, src, sizeof(MpegEncContext));
 566     backup_duplicate_context(dst, &bak);
 567     for(i=0;i<12;i++){
 568         dst->pblocks[i] = (short *)(&dst->block[i]);
 569     }
 570 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 571 }
 572
 573 #ifdef CONFIG_ENCODERS
 574 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 575 #define COPY(a) dst->a= src->a
 576     COPY(pict_type);
 577     COPY(current_picture);
 578     COPY(f_code);
 579     COPY(b_code);
 580     COPY(qscale);
 581     COPY(lambda);
 582     COPY(lambda2);
 583     COPY(picture_in_gop_number);
 584     COPY(gop_picture_number);
 585     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 586     COPY(progressive_frame); //FIXME don't set in encode_header
 587     COPY(partitioned_frame); //FIXME don't set in encode_header
 588 #undef COPY
 589 }
 590 #endif
 591
 592 /**
 593  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 594  * the changed fields will not depend upon the prior state of the MpegEncContext.
 595  */
 596 static void MPV_common_defaults(MpegEncContext *s){
 597     s->y_dc_scale_table=
 598     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 599     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 600     s->progressive_frame= 1;
 601     s->progressive_sequence= 1;
 602     s->picture_structure= PICT_FRAME;
 603
 604     s->coded_picture_number = 0;
 605     s->picture_number = 0;
 606     s->input_picture_number = 0;
 607
 608     s->picture_in_gop_number = 0;
 609
 610     s->f_code = 1;
 611     s->b_code = 1;
 612 }
 613
 614 /**
 615  * sets the given MpegEncContext to defaults for decoding.
 616  * the changed fields will not depend upon the prior state of the MpegEncContext.
 617  */
 618 void MPV_decode_defaults(MpegEncContext *s){
 619     MPV_common_defaults(s);
 620 }
 621
 622 /**
 623  * sets the given MpegEncContext to defaults for encoding.
 624  * the changed fields will not depend upon the prior state of the MpegEncContext.
 625  */
 626
 627 #ifdef CONFIG_ENCODERS
 628 static void MPV_encode_defaults(MpegEncContext *s){
 629     int i;
 630     MPV_common_defaults(s);
 631
 632     for(i=-16; i<16; i++){
 633         default_fcode_tab[i + MAX_MV]= 1;
 634     }
 635     s->me.mv_penalty= default_mv_penalty;
 636     s->fcode_tab= default_fcode_tab;
 637 }
 638 #endif //CONFIG_ENCODERS
 639
 640 /**
 641  * init common structure for both encoder and decoder.
 642  * this assumes that some variables like width/height are already set
 643  */
 644 int MPV_common_init(MpegEncContext *s)
 645 {
 646     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 647
 648     s->mb_height = (s->height + 15) / 16;
 649
 650     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 651         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 652         return -1;
 653     }
 654
 655     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 656         return -1;
 657
 658     dsputil_init(&s->dsp, s->avctx);
 659     DCT_common_init(s);
 660
 661     s->flags= s->avctx->flags;
 662     s->flags2= s->avctx->flags2;
 663
 664     s->mb_width  = (s->width  + 15) / 16;
 665     s->mb_stride = s->mb_width + 1;
 666     s->b8_stride = s->mb_width*2 + 1;
 667     s->b4_stride = s->mb_width*4 + 1;
 668     mb_array_size= s->mb_height * s->mb_stride;
 669     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 670
 671     /* set chroma shifts */
 672     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 673                                                     &(s->chroma_y_shift) );
 674
 675     /* set default edge pos, will be overriden in decode_header if needed */
 676     s->h_edge_pos= s->mb_width*16;
 677     s->v_edge_pos= s->mb_height*16;
 678
 679     s->mb_num = s->mb_width * s->mb_height;
 680
 681     s->block_wrap[0]=
 682     s->block_wrap[1]=
 683     s->block_wrap[2]=
 684     s->block_wrap[3]= s->b8_stride;
 685     s->block_wrap[4]=
 686     s->block_wrap[5]= s->mb_stride;
 687
 688     y_size = s->b8_stride * (2 * s->mb_height + 1);
 689     c_size = s->mb_stride * (s->mb_height + 1);
 690     yc_size = y_size + 2 * c_size;
 691
 692     /* convert fourcc to upper case */
 693     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 694                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 695                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 696                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 697
 698     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 699                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 700                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 701                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 702
 703     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 704
 705     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 706     for(y=0; y<s->mb_height; y++){
 707         for(x=0; x<s->mb_width; x++){
 708             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 709         }
 710     }
 711     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 712
 713     if (s->encoding) {
 714         /* Allocate MV tables */
 715         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 716         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 717         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 718         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 719         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 720         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 721         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 722         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 723         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 724         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 725         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 726         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 727
 728         if(s->msmpeg4_version){
 729             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 730         }
 731         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 732
 733         /* Allocate MB type table */
 734         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 735
 736         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 737
 738         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 739         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 740         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 741         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 742         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 743         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 744
 745         if(s->avctx->noise_reduction){
 746             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 747         }
 748     }
 749     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 750
 751     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 752
 753     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 754         /* interlaced direct mode decoding tables */
 755             for(i=0; i<2; i++){
 756                 int j, k;
 757                 for(j=0; j<2; j++){
 758                     for(k=0; k<2; k++){
 759                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 760                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 761                     }
 762                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 763                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 764                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 765                 }
 766                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 767             }
 768     }
 769     if (s->out_format == FMT_H263) {
 770         /* ac values */
 771         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 772         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 773         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 774         s->ac_val[2] = s->ac_val[1] + c_size;
 775
 776         /* cbp values */
 777         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 778         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 779
 780         /* cbp, ac_pred, pred_dir */
 781         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 782         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 783     }
 784
 785     if (s->h263_pred || s->h263_plus || !s->encoding) {
 786         /* dc values */
 787         //MN: we need these for error resilience of intra-frames
 788         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 789         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 790         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 791         s->dc_val[2] = s->dc_val[1] + c_size;
 792         for(i=0;i<yc_size;i++)
 793             s->dc_val_base[i] = 1024;
 794     }
 795
 796     /* which mb is a intra block */
 797     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 798     memset(s->mbintra_table, 1, mb_array_size);
 799
 800     /* init macroblock skip table */
 801     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 802     //Note the +1 is for a quicker mpeg4 slice_end detection
 803     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 804
 805     s->parse_context.state= -1;
 806     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 807        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 808        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 809        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 810     }
 811
 812     s->context_initialized = 1;
 813
 814     s->thread_context[0]= s;
 815     for(i=1; i<s->avctx->thread_count; i++){
 816         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 817         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 818     }
 819
 820     for(i=0; i<s->avctx->thread_count; i++){
 821         if(init_duplicate_context(s->thread_context[i], s) < 0)
 822            goto fail;
 823         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 824         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 825     }
 826
 827     return 0;
 828  fail:
 829     MPV_common_end(s);
 830     return -1;
 831 }
 832
 833 /* init common structure for both encoder and decoder */
 834 void MPV_common_end(MpegEncContext *s)
 835 {
 836     int i, j, k;
 837
 838     for(i=0; i<s->avctx->thread_count; i++){
 839         free_duplicate_context(s->thread_context[i]);
 840     }
 841     for(i=1; i<s->avctx->thread_count; i++){
 842         av_freep(&s->thread_context[i]);
 843     }
 844
 845     av_freep(&s->parse_context.buffer);
 846     s->parse_context.buffer_size=0;
 847
 848     av_freep(&s->mb_type);
 849     av_freep(&s->p_mv_table_base);
 850     av_freep(&s->b_forw_mv_table_base);
 851     av_freep(&s->b_back_mv_table_base);
 852     av_freep(&s->b_bidir_forw_mv_table_base);
 853     av_freep(&s->b_bidir_back_mv_table_base);
 854     av_freep(&s->b_direct_mv_table_base);
 855     s->p_mv_table= NULL;
 856     s->b_forw_mv_table= NULL;
 857     s->b_back_mv_table= NULL;
 858     s->b_bidir_forw_mv_table= NULL;
 859     s->b_bidir_back_mv_table= NULL;
 860     s->b_direct_mv_table= NULL;
 861     for(i=0; i<2; i++){
 862         for(j=0; j<2; j++){
 863             for(k=0; k<2; k++){
 864                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 865                 s->b_field_mv_table[i][j][k]=NULL;
 866             }
 867             av_freep(&s->b_field_select_table[i][j]);
 868             av_freep(&s->p_field_mv_table_base[i][j]);
 869             s->p_field_mv_table[i][j]=NULL;
 870         }
 871         av_freep(&s->p_field_select_table[i]);
 872     }
 873
 874     av_freep(&s->dc_val_base);
 875     av_freep(&s->ac_val_base);
 876     av_freep(&s->coded_block_base);
 877     av_freep(&s->mbintra_table);
 878     av_freep(&s->cbp_table);
 879     av_freep(&s->pred_dir_table);
 880
 881     av_freep(&s->mbskip_table);
 882     av_freep(&s->prev_pict_types);
 883     av_freep(&s->bitstream_buffer);
 884     s->allocated_bitstream_buffer_size=0;
 885
 886     av_freep(&s->avctx->stats_out);
 887     av_freep(&s->ac_stats);
 888     av_freep(&s->error_status_table);
 889     av_freep(&s->mb_index2xy);
 890     av_freep(&s->lambda_table);
 891     av_freep(&s->q_intra_matrix);
 892     av_freep(&s->q_inter_matrix);
 893     av_freep(&s->q_intra_matrix16);
 894     av_freep(&s->q_inter_matrix16);
 895     av_freep(&s->input_picture);
 896     av_freep(&s->reordered_input_picture);
 897     av_freep(&s->dct_offset);
 898
 899     if(s->picture){
 900         for(i=0; i<MAX_PICTURE_COUNT; i++){
 901             free_picture(s, &s->picture[i]);
 902         }
 903     }
 904     av_freep(&s->picture);
 905     s->context_initialized = 0;
 906     s->last_picture_ptr=
 907     s->next_picture_ptr=
 908     s->current_picture_ptr= NULL;
 909     s->linesize= s->uvlinesize= 0;
 910
 911     for(i=0; i<3; i++)
 912         av_freep(&s->visualization_buffer[i]);
 913
 914     avcodec_default_free_buffers(s->avctx);
 915 }
 916
 917 #ifdef CONFIG_ENCODERS
 918
 919 /* init video encoder */
 920 int MPV_encode_init(AVCodecContext *avctx)
 921 {
 922     MpegEncContext *s = avctx->priv_data;
 923     int i;
 924     int chroma_h_shift, chroma_v_shift;
 925
 926     MPV_encode_defaults(s);
 927
 928     switch (avctx->codec_id) {
 929     case CODEC_ID_MPEG2VIDEO:
 930         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 931             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 932             return -1;
 933         }
 934         break;
 935     case CODEC_ID_LJPEG:
 936     case CODEC_ID_MJPEG:
 937         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 938            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 939             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 940             return -1;
 941         }
 942         break;
 943     default:
 944         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 945             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 946             return -1;
 947         }
 948     }
 949
 950     switch (avctx->pix_fmt) {
 951     case PIX_FMT_YUVJ422P:
 952     case PIX_FMT_YUV422P:
 953         s->chroma_format = CHROMA_422;
 954         break;
 955     case PIX_FMT_YUVJ420P:
 956     case PIX_FMT_YUV420P:
 957     default:
 958         s->chroma_format = CHROMA_420;
 959         break;
 960     }
 961
 962     s->bit_rate = avctx->bit_rate;
 963     s->width = avctx->width;
 964     s->height = avctx->height;
 965     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 966         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 967         avctx->gop_size=600;
 968     }
 969     s->gop_size = avctx->gop_size;
 970     s->avctx = avctx;
 971     s->flags= avctx->flags;
 972     s->flags2= avctx->flags2;
 973     s->max_b_frames= avctx->max_b_frames;
 974     s->codec_id= avctx->codec->id;
 975     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 976     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 977     s->strict_std_compliance= avctx->strict_std_compliance;
 978     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 979     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 980     s->mpeg_quant= avctx->mpeg_quant;
 981     s->rtp_mode= !!avctx->rtp_payload_size;
 982     s->intra_dc_precision= avctx->intra_dc_precision;
 983     s->user_specified_pts = AV_NOPTS_VALUE;
 984
 985     if (s->gop_size <= 1) {
 986         s->intra_only = 1;
 987         s->gop_size = 12;
 988     } else {
 989         s->intra_only = 0;
 990     }
 991
 992     s->me_method = avctx->me_method;
 993
 994     /* Fixed QSCALE */
 995     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 996
 997     s->adaptive_quant= (   s->avctx->lumi_masking
 998                         || s->avctx->dark_masking
 999                         || s->avctx->temporal_cplx_masking
1000                         || s->avctx->spatial_cplx_masking
1001                         || s->avctx->p_masking
1002                         || s->avctx->border_masking
1003                         || (s->flags&CODEC_FLAG_QP_RD))
1004                        && !s->fixed_qscale;
1005
1006     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1007     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1008     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1009     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1010     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1011
1012     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1013         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1014         return -1;
1015     }
1016
1017     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1018         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1019     }
1020
1021     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1022         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1023         return -1;
1024     }
1025
1026     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1027         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1028         return -1;
1029     }
1030
1031     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1032        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1033        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1034
1035         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1036     }
1037
1038     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1039        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1040         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1041         return -1;
1042     }
1043
1044     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1045         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1046         return -1;
1047     }
1048
1049     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1050         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1051         return -1;
1052     }
1053
1054     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1055         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1056         return -1;
1057     }
1058
1059     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1060         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1061         return -1;
1062     }
1063
1064     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1065         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1066         return -1;
1067     }
1068
1069     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1070        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1071         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1072         return -1;
1073     }
1074
1075     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1076         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1081         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1082         return -1;
1083     }
1084
1085     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1086         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1087         return -1;
1088     }
1089
1090     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1091         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1092         return -1;
1093     }
1094
1095     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1096         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1097         return -1;
1098     }
1099
1100     if(s->flags & CODEC_FLAG_LOW_DELAY){
1101         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1102             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1103             return -1;
1104         }
1105         if (s->max_b_frames != 0){
1106             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1107             return -1;
1108         }
1109     }
1110
1111     if(s->q_scale_type == 1){
1112         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1113             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1114             return -1;
1115         }
1116         if(avctx->qmax > 12){
1117             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1118             return -1;
1119         }
1120     }
1121
1122     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1123        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1124        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1125         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1126         return -1;
1127     }
1128
1129     if(s->avctx->thread_count > 1)
1130         s->rtp_mode= 1;
1131
1132     if(!avctx->time_base.den || !avctx->time_base.num){
1133         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1134         return -1;
1135     }
1136
1137     i= (INT_MAX/2+128)>>8;
1138     if(avctx->me_threshold >= i){
1139         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1140         return -1;
1141     }
1142     if(avctx->mb_threshold >= i){
1143         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1144         return -1;
1145     }
1146
1147     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1148         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1149         avctx->b_frame_strategy = 0;
1150     }
1151
1152     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1153     if(i > 1){
1154         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1155         avctx->time_base.den /= i;
1156         avctx->time_base.num /= i;
1157 //        return -1;
1158     }
1159
1160     if(s->codec_id==CODEC_ID_MJPEG){
1161         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1162         s->inter_quant_bias= 0;
1163     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1164         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1165         s->inter_quant_bias= 0;
1166     }else{
1167         s->intra_quant_bias=0;
1168         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1169     }
1170
1171     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1172         s->intra_quant_bias= avctx->intra_quant_bias;
1173     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1174         s->inter_quant_bias= avctx->inter_quant_bias;
1175
1176     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1177
1178     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1179         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1180         return -1;
1181     }
1182     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1183
1184     switch(avctx->codec->id) {
1185     case CODEC_ID_MPEG1VIDEO:
1186         s->out_format = FMT_MPEG1;
1187         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1188         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1189         break;
1190     case CODEC_ID_MPEG2VIDEO:
1191         s->out_format = FMT_MPEG1;
1192         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1193         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1194         s->rtp_mode= 1;
1195         break;
1196     case CODEC_ID_LJPEG:
1197     case CODEC_ID_MJPEG:
1198         s->out_format = FMT_MJPEG;
1199         s->intra_only = 1; /* force intra only for jpeg */
1200         s->mjpeg_write_tables = 1;
1201         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1202         s->mjpeg_vsample[0] = 2;
1203         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1204         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1205         s->mjpeg_hsample[0] = 2;
1206         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1207         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1208         if (!(ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER) || mjpeg_init(s) < 0)
1209             return -1;
1210         avctx->delay=0;
1211         s->low_delay=1;
1212         break;
1213 #ifdef CONFIG_H261_ENCODER
1214     case CODEC_ID_H261:
1215         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1216             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1217             return -1;
1218         }
1219         s->out_format = FMT_H261;
1220         avctx->delay=0;
1221         s->low_delay=1;
1222         break;
1223 #endif
1224     case CODEC_ID_H263:
1225         if (h263_get_picture_format(s->width, s->height) == 7) {
1226             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1227             return -1;
1228         }
1229         s->out_format = FMT_H263;
1230         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1231         avctx->delay=0;
1232         s->low_delay=1;
1233         break;
1234     case CODEC_ID_H263P:
1235         s->out_format = FMT_H263;
1236         s->h263_plus = 1;
1237         /* Fx */
1238         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1239         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1240         s->modified_quant= s->h263_aic;
1241         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1242         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1243         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1244         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1245         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1246
1247         /* /Fx */
1248         /* These are just to be sure */
1249         avctx->delay=0;
1250         s->low_delay=1;
1251         break;
1252     case CODEC_ID_FLV1:
1253         s->out_format = FMT_H263;
1254         s->h263_flv = 2; /* format = 1; 11-bit codes */
1255         s->unrestricted_mv = 1;
1256         s->rtp_mode=0; /* don't allow GOB */
1257         avctx->delay=0;
1258         s->low_delay=1;
1259         break;
1260     case CODEC_ID_RV10:
1261         s->out_format = FMT_H263;
1262         avctx->delay=0;
1263         s->low_delay=1;
1264         break;
1265     case CODEC_ID_RV20:
1266         s->out_format = FMT_H263;
1267         avctx->delay=0;
1268         s->low_delay=1;
1269         s->modified_quant=1;
1270         s->h263_aic=1;
1271         s->h263_plus=1;
1272         s->loop_filter=1;
1273         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1274         break;
1275     case CODEC_ID_MPEG4:
1276         s->out_format = FMT_H263;
1277         s->h263_pred = 1;
1278         s->unrestricted_mv = 1;
1279         s->low_delay= s->max_b_frames ? 0 : 1;
1280         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1281         break;
1282     case CODEC_ID_MSMPEG4V1:
1283         s->out_format = FMT_H263;
1284         s->h263_msmpeg4 = 1;
1285         s->h263_pred = 1;
1286         s->unrestricted_mv = 1;
1287         s->msmpeg4_version= 1;
1288         avctx->delay=0;
1289         s->low_delay=1;
1290         break;
1291     case CODEC_ID_MSMPEG4V2:
1292         s->out_format = FMT_H263;
1293         s->h263_msmpeg4 = 1;
1294         s->h263_pred = 1;
1295         s->unrestricted_mv = 1;
1296         s->msmpeg4_version= 2;
1297         avctx->delay=0;
1298         s->low_delay=1;
1299         break;
1300     case CODEC_ID_MSMPEG4V3:
1301         s->out_format = FMT_H263;
1302         s->h263_msmpeg4 = 1;
1303         s->h263_pred = 1;
1304         s->unrestricted_mv = 1;
1305         s->msmpeg4_version= 3;
1306         s->flipflop_rounding=1;
1307         avctx->delay=0;
1308         s->low_delay=1;
1309         break;
1310     case CODEC_ID_WMV1:
1311         s->out_format = FMT_H263;
1312         s->h263_msmpeg4 = 1;
1313         s->h263_pred = 1;
1314         s->unrestricted_mv = 1;
1315         s->msmpeg4_version= 4;
1316         s->flipflop_rounding=1;
1317         avctx->delay=0;
1318         s->low_delay=1;
1319         break;
1320     case CODEC_ID_WMV2:
1321         s->out_format = FMT_H263;
1322         s->h263_msmpeg4 = 1;
1323         s->h263_pred = 1;
1324         s->unrestricted_mv = 1;
1325         s->msmpeg4_version= 5;
1326         s->flipflop_rounding=1;
1327         avctx->delay=0;
1328         s->low_delay=1;
1329         break;
1330     default:
1331         return -1;
1332     }
1333
1334     avctx->has_b_frames= !s->low_delay;
1335
1336     s->encoding = 1;
1337
1338     /* init */
1339     if (MPV_common_init(s) < 0)
1340         return -1;
1341
1342     if(s->modified_quant)
1343         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1344     s->progressive_frame=
1345     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1346     s->quant_precision=5;
1347
1348     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1349     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1350
1351 #ifdef CONFIG_H261_ENCODER
1352     if (s->out_format == FMT_H261)
1353         ff_h261_encode_init(s);
1354 #endif
1355     if (s->out_format == FMT_H263)
1356         h263_encode_init(s);
1357     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version)
1358         ff_msmpeg4_encode_init(s);
1359     if (s->out_format == FMT_MPEG1)
1360         ff_mpeg1_encode_init(s);
1361
1362     /* init q matrix */
1363     for(i=0;i<64;i++) {
1364         int j= s->dsp.idct_permutation[i];
1365         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1366             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1367             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1368         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1369             s->intra_matrix[j] =
1370             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1371         }else
1372         { /* mpeg1/2 */
1373             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1374             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1375         }
1376         if(s->avctx->intra_matrix)
1377             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1378         if(s->avctx->inter_matrix)
1379             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1380     }
1381
1382     /* precompute matrix */
1383     /* for mjpeg, we do include qscale in the matrix */
1384     if (s->out_format != FMT_MJPEG) {
1385         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1386                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1387         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1388                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1389     }
1390
1391     if(ff_rate_control_init(s) < 0)
1392         return -1;
1393
1394     return 0;
1395 }
1396
1397 int MPV_encode_end(AVCodecContext *avctx)
1398 {
1399     MpegEncContext *s = avctx->priv_data;
1400
1401     ff_rate_control_uninit(s);
1402
1403     MPV_common_end(s);
1404     if ((ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
1405         mjpeg_close(s);
1406
1407     av_freep(&avctx->extradata);
1408
1409     return 0;
1410 }
1411
1412 #endif //CONFIG_ENCODERS
1413
1414 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
1415 {
1416     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1417     uint8_t index_run[MAX_RUN+1];
1418     int last, run, level, start, end, i;
1419
1420     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1421     if(static_store && rl->max_level[0])
1422         return;
1423
1424     /* compute max_level[], max_run[] and index_run[] */
1425     for(last=0;last<2;last++) {
1426         if (last == 0) {
1427             start = 0;
1428             end = rl->last;
1429         } else {
1430             start = rl->last;
1431             end = rl->n;
1432         }
1433
1434         memset(max_level, 0, MAX_RUN + 1);
1435         memset(max_run, 0, MAX_LEVEL + 1);
1436         memset(index_run, rl->n, MAX_RUN + 1);
1437         for(i=start;i<end;i++) {
1438             run = rl->table_run[i];
1439             level = rl->table_level[i];
1440             if (index_run[run] == rl->n)
1441                 index_run[run] = i;
1442             if (level > max_level[run])
1443                 max_level[run] = level;
1444             if (run > max_run[level])
1445                 max_run[level] = run;
1446         }
1447         if(static_store)
1448             rl->max_level[last] = static_store[last];
1449         else
1450             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1451         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1452         if(static_store)
1453             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
1454         else
1455             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1456         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1457         if(static_store)
1458             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1459         else
1460             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1461         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1462     }
1463 }
1464
1465 /* draw the edges of width 'w' of an image of size width, height */
1466 //FIXME check that this is ok for mpeg4 interlaced
1467 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1468 {
1469     uint8_t *ptr, *last_line;
1470     int i;
1471
1472     last_line = buf + (height - 1) * wrap;
1473     for(i=0;i<w;i++) {
1474         /* top and bottom */
1475         memcpy(buf - (i + 1) * wrap, buf, width);
1476         memcpy(last_line + (i + 1) * wrap, last_line, width);
1477     }
1478     /* left and right */
1479     ptr = buf;
1480     for(i=0;i<height;i++) {
1481         memset(ptr - w, ptr[0], w);
1482         memset(ptr + width, ptr[width-1], w);
1483         ptr += wrap;
1484     }
1485     /* corners */
1486     for(i=0;i<w;i++) {
1487         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1488         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1489         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1490         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1491     }
1492 }
1493
1494 int ff_find_unused_picture(MpegEncContext *s, int shared){
1495     int i;
1496
1497     if(shared){
1498         for(i=0; i<MAX_PICTURE_COUNT; i++){
1499             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1500         }
1501     }else{
1502         for(i=0; i<MAX_PICTURE_COUNT; i++){
1503             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1504         }
1505         for(i=0; i<MAX_PICTURE_COUNT; i++){
1506             if(s->picture[i].data[0]==NULL) return i;
1507         }
1508     }
1509
1510     assert(0);
1511     return -1;
1512 }
1513
1514 static void update_noise_reduction(MpegEncContext *s){
1515     int intra, i;
1516
1517     for(intra=0; intra<2; intra++){
1518         if(s->dct_count[intra] > (1<<16)){
1519             for(i=0; i<64; i++){
1520                 s->dct_error_sum[intra][i] >>=1;
1521             }
1522             s->dct_count[intra] >>= 1;
1523         }
1524
1525         for(i=0; i<64; i++){
1526             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1527         }
1528     }
1529 }
1530
1531 /**
1532  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1533  */
1534 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1535 {
1536     int i;
1537     AVFrame *pic;
1538     s->mb_skipped = 0;
1539
1540     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1541
1542     /* mark&release old frames */
1543     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1544       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1545         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1546
1547         /* release forgotten pictures */
1548         /* if(mpeg124/h263) */
1549         if(!s->encoding){
1550             for(i=0; i<MAX_PICTURE_COUNT; i++){
1551                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1552                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1553                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1554                 }
1555             }
1556         }
1557       }
1558     }
1559 alloc:
1560     if(!s->encoding){
1561         /* release non reference frames */
1562         for(i=0; i<MAX_PICTURE_COUNT; i++){
1563             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1564                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1565             }
1566         }
1567
1568         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1569             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1570         else{
1571             i= ff_find_unused_picture(s, 0);
1572             pic= (AVFrame*)&s->picture[i];
1573         }
1574
1575         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1576                         && !s->dropable ? 3 : 0;
1577
1578         pic->coded_picture_number= s->coded_picture_number++;
1579
1580         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1581             return -1;
1582
1583         s->current_picture_ptr= (Picture*)pic;
1584         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1585         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1586     }
1587
1588     s->current_picture_ptr->pict_type= s->pict_type;
1589 //    if(s->flags && CODEC_FLAG_QSCALE)
1590   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1591     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1592
1593     copy_picture(&s->current_picture, s->current_picture_ptr);
1594
1595     if (s->pict_type != B_TYPE) {
1596         s->last_picture_ptr= s->next_picture_ptr;
1597         if(!s->dropable)
1598             s->next_picture_ptr= s->current_picture_ptr;
1599     }
1600 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1601         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1602         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1603         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1604         s->pict_type, s->dropable);*/
1605
1606     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1607     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1608
1609     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1610         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1611         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1612         goto alloc;
1613     }
1614
1615     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1616
1617     if(s->picture_structure!=PICT_FRAME){
1618         int i;
1619         for(i=0; i<4; i++){
1620             if(s->picture_structure == PICT_BOTTOM_FIELD){
1621                  s->current_picture.data[i] += s->current_picture.linesize[i];
1622             }
1623             s->current_picture.linesize[i] *= 2;
1624             s->last_picture.linesize[i] *=2;
1625             s->next_picture.linesize[i] *=2;
1626         }
1627     }
1628
1629     s->hurry_up= s->avctx->hurry_up;
1630     s->error_resilience= avctx->error_resilience;
1631
1632     /* set dequantizer, we can't do it during init as it might change for mpeg4
1633        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1634     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1635         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1636         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1637     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1638         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1640     }else{
1641         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1642         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1643     }
1644
1645     if(s->dct_error_sum){
1646         assert(s->avctx->noise_reduction && s->encoding);
1647
1648         update_noise_reduction(s);
1649     }
1650
1651 #ifdef HAVE_XVMC
1652     if(s->avctx->xvmc_acceleration)
1653         return XVMC_field_start(s, avctx);
1654 #endif
1655     return 0;
1656 }
1657
1658 /* generic function for encode/decode called after a frame has been coded/decoded */
1659 void MPV_frame_end(MpegEncContext *s)
1660 {
1661     int i;
1662     /* draw edge for correct motion prediction if outside */
1663 #ifdef HAVE_XVMC
1664 //just to make sure that all data is rendered.
1665     if(s->avctx->xvmc_acceleration){
1666         XVMC_field_end(s);
1667     }else
1668 #endif
1669     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1670             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1671             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1672             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1673     }
1674     emms_c();
1675
1676     s->last_pict_type    = s->pict_type;
1677     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1678     if(s->pict_type!=B_TYPE){
1679         s->last_non_b_pict_type= s->pict_type;
1680     }
1681 #if 0
1682         /* copy back current_picture variables */
1683     for(i=0; i<MAX_PICTURE_COUNT; i++){
1684         if(s->picture[i].data[0] == s->current_picture.data[0]){
1685             s->picture[i]= s->current_picture;
1686             break;
1687         }
1688     }
1689     assert(i<MAX_PICTURE_COUNT);
1690 #endif
1691
1692     if(s->encoding){
1693         /* release non-reference frames */
1694         for(i=0; i<MAX_PICTURE_COUNT; i++){
1695             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1696                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1697             }
1698         }
1699     }
1700     // clear copies, to avoid confusion
1701 #if 0
1702     memset(&s->last_picture, 0, sizeof(Picture));
1703     memset(&s->next_picture, 0, sizeof(Picture));
1704     memset(&s->current_picture, 0, sizeof(Picture));
1705 #endif
1706     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1707 }
1708
1709 /**
1710  * draws an line from (ex, ey) -> (sx, sy).
1711  * @param w width of the image
1712  * @param h height of the image
1713  * @param stride stride/linesize of the image
1714  * @param color color of the arrow
1715  */
1716 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1717     int x, y, fr, f;
1718
1719     sx= av_clip(sx, 0, w-1);
1720     sy= av_clip(sy, 0, h-1);
1721     ex= av_clip(ex, 0, w-1);
1722     ey= av_clip(ey, 0, h-1);
1723
1724     buf[sy*stride + sx]+= color;
1725
1726     if(FFABS(ex - sx) > FFABS(ey - sy)){
1727         if(sx > ex){
1728             FFSWAP(int, sx, ex);
1729             FFSWAP(int, sy, ey);
1730         }
1731         buf+= sx + sy*stride;
1732         ex-= sx;
1733         f= ((ey-sy)<<16)/ex;
1734         for(x= 0; x <= ex; x++){
1735             y = (x*f)>>16;
1736             fr= (x*f)&0xFFFF;
1737             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1738             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1739         }
1740     }else{
1741         if(sy > ey){
1742             FFSWAP(int, sx, ex);
1743             FFSWAP(int, sy, ey);
1744         }
1745         buf+= sx + sy*stride;
1746         ey-= sy;
1747         if(ey) f= ((ex-sx)<<16)/ey;
1748         else   f= 0;
1749         for(y= 0; y <= ey; y++){
1750             x = (y*f)>>16;
1751             fr= (y*f)&0xFFFF;
1752             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1753             buf[y*stride + x+1]+= (color*         fr )>>16;;
1754         }
1755     }
1756 }
1757
1758 /**
1759  * draws an arrow from (ex, ey) -> (sx, sy).
1760  * @param w width of the image
1761  * @param h height of the image
1762  * @param stride stride/linesize of the image
1763  * @param color color of the arrow
1764  */
1765 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1766     int dx,dy;
1767
1768     sx= av_clip(sx, -100, w+100);
1769     sy= av_clip(sy, -100, h+100);
1770     ex= av_clip(ex, -100, w+100);
1771     ey= av_clip(ey, -100, h+100);
1772
1773     dx= ex - sx;
1774     dy= ey - sy;
1775
1776     if(dx*dx + dy*dy > 3*3){
1777         int rx=  dx + dy;
1778         int ry= -dx + dy;
1779         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1780
1781         //FIXME subpixel accuracy
1782         rx= ROUNDED_DIV(rx*3<<4, length);
1783         ry= ROUNDED_DIV(ry*3<<4, length);
1784
1785         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1786         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1787     }
1788     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1789 }
1790
1791 /**
1792  * prints debuging info for the given picture.
1793  */
1794 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1795
1796     if(!pict || !pict->mb_type) return;
1797
1798     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1799         int x,y;
1800
1801         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1802         switch (pict->pict_type) {
1803             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1804             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1805             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1806             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1807             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1808             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1809         }
1810         for(y=0; y<s->mb_height; y++){
1811             for(x=0; x<s->mb_width; x++){
1812                 if(s->avctx->debug&FF_DEBUG_SKIP){
1813                     int count= s->mbskip_table[x + y*s->mb_stride];
1814                     if(count>9) count=9;
1815                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1816                 }
1817                 if(s->avctx->debug&FF_DEBUG_QP){
1818                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1819                 }
1820                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1821                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1822                     //Type & MV direction
1823                     if(IS_PCM(mb_type))
1824                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1825                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1826                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1827                     else if(IS_INTRA4x4(mb_type))
1828                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1829                     else if(IS_INTRA16x16(mb_type))
1830                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1831                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1832                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1833                     else if(IS_DIRECT(mb_type))
1834                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1835                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1836                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1837                     else if(IS_GMC(mb_type))
1838                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1839                     else if(IS_SKIP(mb_type))
1840                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1841                     else if(!USES_LIST(mb_type, 1))
1842                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1843                     else if(!USES_LIST(mb_type, 0))
1844                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1845                     else{
1846                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1847                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1848                     }
1849
1850                     //segmentation
1851                     if(IS_8X8(mb_type))
1852                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1853                     else if(IS_16X8(mb_type))
1854                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1855                     else if(IS_8X16(mb_type))
1856                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1857                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1858                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1859                     else
1860                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1861
1862
1863                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1864                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1865                     else
1866                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1867                 }
1868 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1869             }
1870             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1871         }
1872     }
1873
1874     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1875         const int shift= 1 + s->quarter_sample;
1876         int mb_y;
1877         uint8_t *ptr;
1878         int i;
1879         int h_chroma_shift, v_chroma_shift;
1880         const int width = s->avctx->width;
1881         const int height= s->avctx->height;
1882         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1883         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1884         s->low_delay=0; //needed to see the vectors without trashing the buffers
1885
1886         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1887         for(i=0; i<3; i++){
1888             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1889             pict->data[i]= s->visualization_buffer[i];
1890         }
1891         pict->type= FF_BUFFER_TYPE_COPY;
1892         ptr= pict->data[0];
1893
1894         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1895             int mb_x;
1896             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1897                 const int mb_index= mb_x + mb_y*s->mb_stride;
1898                 if((s->avctx->debug_mv) && pict->motion_val){
1899                   int type;
1900                   for(type=0; type<3; type++){
1901                     int direction = 0;
1902                     switch (type) {
1903                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1904                                 continue;
1905                               direction = 0;
1906                               break;
1907                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1908                                 continue;
1909                               direction = 0;
1910                               break;
1911                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1912                                 continue;
1913                               direction = 1;
1914                               break;
1915                     }
1916                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1917                         continue;
1918
1919                     if(IS_8X8(pict->mb_type[mb_index])){
1920                       int i;
1921                       for(i=0; i<4; i++){
1922                         int sx= mb_x*16 + 4 + 8*(i&1);
1923                         int sy= mb_y*16 + 4 + 8*(i>>1);
1924                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1925                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1926                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1927                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1928                       }
1929                     }else if(IS_16X8(pict->mb_type[mb_index])){
1930                       int i;
1931                       for(i=0; i<2; i++){
1932                         int sx=mb_x*16 + 8;
1933                         int sy=mb_y*16 + 4 + 8*i;
1934                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1935                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1936                         int my=(pict->motion_val[direction][xy][1]>>shift);
1937
1938                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1939                             my*=2;
1940
1941                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1942                       }
1943                     }else if(IS_8X16(pict->mb_type[mb_index])){
1944                       int i;
1945                       for(i=0; i<2; i++){
1946                         int sx=mb_x*16 + 4 + 8*i;
1947                         int sy=mb_y*16 + 8;
1948                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1949                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1950                         int my=(pict->motion_val[direction][xy][1]>>shift);
1951
1952                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1953                             my*=2;
1954
1955                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1956                       }
1957                     }else{
1958                       int sx= mb_x*16 + 8;
1959                       int sy= mb_y*16 + 8;
1960                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1961                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1962                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1963                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1964                     }
1965                   }
1966                 }
1967                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1968                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1969                     int y;
1970                     for(y=0; y<8; y++){
1971                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1972                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1973                     }
1974                 }
1975                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1976                     int mb_type= pict->mb_type[mb_index];
1977                     uint64_t u,v;
1978                     int y;
1979 #define COLOR(theta, r)\
1980 u= (int)(128 + r*cos(theta*3.141592/180));\
1981 v= (int)(128 + r*sin(theta*3.141592/180));
1982
1983
1984                     u=v=128;
1985                     if(IS_PCM(mb_type)){
1986                         COLOR(120,48)
1987                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1988                         COLOR(30,48)
1989                     }else if(IS_INTRA4x4(mb_type)){
1990                         COLOR(90,48)
1991                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1992 //                        COLOR(120,48)
1993                     }else if(IS_DIRECT(mb_type)){
1994                         COLOR(150,48)
1995                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1996                         COLOR(170,48)
1997                     }else if(IS_GMC(mb_type)){
1998                         COLOR(190,48)
1999                     }else if(IS_SKIP(mb_type)){
2000 //                        COLOR(180,48)
2001                     }else if(!USES_LIST(mb_type, 1)){
2002                         COLOR(240,48)
2003                     }else if(!USES_LIST(mb_type, 0)){
2004                         COLOR(0,48)
2005                     }else{
2006                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2007                         COLOR(300,48)
2008                     }
2009
2010                     u*= 0x0101010101010101ULL;
2011                     v*= 0x0101010101010101ULL;
2012                     for(y=0; y<8; y++){
2013                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2014                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2015                     }
2016
2017                     //segmentation
2018                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2019                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2020                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2021                     }
2022                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2023                         for(y=0; y<16; y++)
2024                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2025                     }
2026                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2027                         int dm= 1 << (mv_sample_log2-2);
2028                         for(i=0; i<4; i++){
2029                             int sx= mb_x*16 + 8*(i&1);
2030                             int sy= mb_y*16 + 8*(i>>1);
2031                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2032                             //FIXME bidir
2033                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2034                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2035                                 for(y=0; y<8; y++)
2036                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2037                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2038                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2039                         }
2040                     }
2041
2042                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2043                         // hmm
2044                     }
2045                 }
2046                 s->mbskip_table[mb_index]=0;
2047             }
2048         }
2049     }
2050 }
2051
2052 #ifdef CONFIG_ENCODERS
2053
2054 static int get_sae(uint8_t *src, int ref, int stride){
2055     int x,y;
2056     int acc=0;
2057
2058     for(y=0; y<16; y++){
2059         for(x=0; x<16; x++){
2060             acc+= FFABS(src[x+y*stride] - ref);
2061         }
2062     }
2063
2064     return acc;
2065 }
2066
2067 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2068     int x, y, w, h;
2069     int acc=0;
2070
2071     w= s->width &~15;
2072     h= s->height&~15;
2073
2074     for(y=0; y<h; y+=16){
2075         for(x=0; x<w; x+=16){
2076             int offset= x + y*stride;
2077             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2078             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2079             int sae = get_sae(src + offset, mean, stride);
2080
2081             acc+= sae + 500 < sad;
2082         }
2083     }
2084     return acc;
2085 }
2086
2087
2088 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2089     AVFrame *pic=NULL;
2090     int64_t pts;
2091     int i;
2092     const int encoding_delay= s->max_b_frames;
2093     int direct=1;
2094
2095     if(pic_arg){
2096         pts= pic_arg->pts;
2097         pic_arg->display_picture_number= s->input_picture_number++;
2098
2099         if(pts != AV_NOPTS_VALUE){
2100             if(s->user_specified_pts != AV_NOPTS_VALUE){
2101                 int64_t time= pts;
2102                 int64_t last= s->user_specified_pts;
2103
2104                 if(time <= last){
2105                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2106                     return -1;
2107                 }
2108             }
2109             s->user_specified_pts= pts;
2110         }else{
2111             if(s->user_specified_pts != AV_NOPTS_VALUE){
2112                 s->user_specified_pts=
2113                 pts= s->user_specified_pts + 1;
2114                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2115             }else{
2116                 pts= pic_arg->display_picture_number;
2117             }
2118         }
2119     }
2120
2121   if(pic_arg){
2122     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2123     if(pic_arg->linesize[0] != s->linesize) direct=0;
2124     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2125     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2126
2127 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2128
2129     if(direct){
2130         i= ff_find_unused_picture(s, 1);
2131
2132         pic= (AVFrame*)&s->picture[i];
2133         pic->reference= 3;
2134
2135         for(i=0; i<4; i++){
2136             pic->data[i]= pic_arg->data[i];
2137             pic->linesize[i]= pic_arg->linesize[i];
2138         }
2139         alloc_picture(s, (Picture*)pic, 1);
2140     }else{
2141         i= ff_find_unused_picture(s, 0);
2142
2143         pic= (AVFrame*)&s->picture[i];
2144         pic->reference= 3;
2145
2146         alloc_picture(s, (Picture*)pic, 0);
2147
2148         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2149            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2150            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2151        // empty
2152         }else{
2153             int h_chroma_shift, v_chroma_shift;
2154             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2155
2156             for(i=0; i<3; i++){
2157                 int src_stride= pic_arg->linesize[i];
2158                 int dst_stride= i ? s->uvlinesize : s->linesize;
2159                 int h_shift= i ? h_chroma_shift : 0;
2160                 int v_shift= i ? v_chroma_shift : 0;
2161                 int w= s->width >>h_shift;
2162                 int h= s->height>>v_shift;
2163                 uint8_t *src= pic_arg->data[i];
2164                 uint8_t *dst= pic->data[i];
2165
2166                 if(!s->avctx->rc_buffer_size)
2167                     dst +=INPLACE_OFFSET;
2168
2169                 if(src_stride==dst_stride)
2170                     memcpy(dst, src, src_stride*h);
2171                 else{
2172                     while(h--){
2173                         memcpy(dst, src, w);
2174                         dst += dst_stride;
2175                         src += src_stride;
2176                     }
2177                 }
2178             }
2179         }
2180     }
2181     copy_picture_attributes(s, pic, pic_arg);
2182     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2183   }
2184
2185     /* shift buffer entries */
2186     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2187         s->input_picture[i-1]= s->input_picture[i];
2188
2189     s->input_picture[encoding_delay]= (Picture*)pic;
2190
2191     return 0;
2192 }
2193
2194 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2195     int x, y, plane;
2196     int score=0;
2197     int64_t score64=0;
2198
2199     for(plane=0; plane<3; plane++){
2200         const int stride= p->linesize[plane];
2201         const int bw= plane ? 1 : 2;
2202         for(y=0; y<s->mb_height*bw; y++){
2203             for(x=0; x<s->mb_width*bw; x++){
2204                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2205                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2206
2207                 switch(s->avctx->frame_skip_exp){
2208                     case 0: score= FFMAX(score, v); break;
2209                     case 1: score+= FFABS(v);break;
2210                     case 2: score+= v*v;break;
2211                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2212                     case 4: score64+= v*v*(int64_t)(v*v);break;
2213                 }
2214             }
2215         }
2216     }
2217
2218     if(score) score64= score;
2219
2220     if(score64 < s->avctx->frame_skip_threshold)
2221         return 1;
2222     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2223         return 1;
2224     return 0;
2225 }
2226
2227 static int estimate_best_b_count(MpegEncContext *s){
2228     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2229     AVCodecContext *c= avcodec_alloc_context();
2230     AVFrame input[FF_MAX_B_FRAMES+2];
2231     const int scale= s->avctx->brd_scale;
2232     int i, j, out_size, p_lambda, b_lambda, lambda2;
2233     int outbuf_size= s->width * s->height; //FIXME
2234     uint8_t *outbuf= av_malloc(outbuf_size);
2235     int64_t best_rd= INT64_MAX;
2236     int best_b_count= -1;
2237
2238     assert(scale>=0 && scale <=3);
2239
2240 //    emms_c();
2241     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2242     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2243     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2244     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2245
2246     c->width = s->width >> scale;
2247     c->height= s->height>> scale;
2248     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2249     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2250     c->mb_decision= s->avctx->mb_decision;
2251     c->me_cmp= s->avctx->me_cmp;
2252     c->mb_cmp= s->avctx->mb_cmp;
2253     c->me_sub_cmp= s->avctx->me_sub_cmp;
2254     c->pix_fmt = PIX_FMT_YUV420P;
2255     c->time_base= s->avctx->time_base;
2256     c->max_b_frames= s->max_b_frames;
2257
2258     if (avcodec_open(c, codec) < 0)
2259         return -1;
2260
2261     for(i=0; i<s->max_b_frames+2; i++){
2262         int ysize= c->width*c->height;
2263         int csize= (c->width/2)*(c->height/2);
2264         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2265
2266         avcodec_get_frame_defaults(&input[i]);
2267         input[i].data[0]= av_malloc(ysize + 2*csize);
2268         input[i].data[1]= input[i].data[0] + ysize;
2269         input[i].data[2]= input[i].data[1] + csize;
2270         input[i].linesize[0]= c->width;
2271         input[i].linesize[1]=
2272         input[i].linesize[2]= c->width/2;
2273
2274         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2275             pre_input= *pre_input_ptr;
2276
2277             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2278                 pre_input.data[0]+=INPLACE_OFFSET;
2279                 pre_input.data[1]+=INPLACE_OFFSET;
2280                 pre_input.data[2]+=INPLACE_OFFSET;
2281             }
2282
2283             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2284             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2285             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2286         }
2287     }
2288
2289     for(j=0; j<s->max_b_frames+1; j++){
2290         int64_t rd=0;
2291
2292         if(!s->input_picture[j])
2293             break;
2294
2295         c->error[0]= c->error[1]= c->error[2]= 0;
2296
2297         input[0].pict_type= I_TYPE;
2298         input[0].quality= 1 * FF_QP2LAMBDA;
2299         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2300 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2301
2302         for(i=0; i<s->max_b_frames+1; i++){
2303             int is_p= i % (j+1) == j || i==s->max_b_frames;
2304
2305             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2306             input[i+1].quality= is_p ? p_lambda : b_lambda;
2307             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2308             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2309         }
2310
2311         /* get the delayed frames */
2312         while(out_size){
2313             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2314             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2315         }
2316
2317         rd += c->error[0] + c->error[1] + c->error[2];
2318
2319         if(rd < best_rd){
2320             best_rd= rd;
2321             best_b_count= j;
2322         }
2323     }
2324
2325     av_freep(&outbuf);
2326     avcodec_close(c);
2327     av_freep(&c);
2328
2329     for(i=0; i<s->max_b_frames+2; i++){
2330         av_freep(&input[i].data[0]);
2331     }
2332
2333     return best_b_count;
2334 }
2335
2336 static void select_input_picture(MpegEncContext *s){
2337     int i;
2338
2339     for(i=1; i<MAX_PICTURE_COUNT; i++)
2340         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2341     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2342
2343     /* set next picture type & ordering */
2344     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2345         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2346             s->reordered_input_picture[0]= s->input_picture[0];
2347             s->reordered_input_picture[0]->pict_type= I_TYPE;
2348             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2349         }else{
2350             int b_frames;
2351
2352             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2353                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2354                 //FIXME check that te gop check above is +-1 correct
2355 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2356
2357                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2358                         for(i=0; i<4; i++)
2359                             s->input_picture[0]->data[i]= NULL;
2360                         s->input_picture[0]->type= 0;
2361                     }else{
2362                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2363                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2364
2365                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2366                     }
2367
2368                     emms_c();
2369                     ff_vbv_update(s, 0);
2370
2371                     goto no_output_pic;
2372                 }
2373             }
2374
2375             if(s->flags&CODEC_FLAG_PASS2){
2376                 for(i=0; i<s->max_b_frames+1; i++){
2377                     int pict_num= s->input_picture[0]->display_picture_number + i;
2378
2379                     if(pict_num >= s->rc_context.num_entries)
2380                         break;
2381                     if(!s->input_picture[i]){
2382                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2383                         break;
2384                     }
2385
2386                     s->input_picture[i]->pict_type=
2387                         s->rc_context.entry[pict_num].new_pict_type;
2388                 }
2389             }
2390
2391             if(s->avctx->b_frame_strategy==0){
2392                 b_frames= s->max_b_frames;
2393                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2394             }else if(s->avctx->b_frame_strategy==1){
2395                 for(i=1; i<s->max_b_frames+1; i++){
2396                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2397                         s->input_picture[i]->b_frame_score=
2398                             get_intra_count(s, s->input_picture[i  ]->data[0],
2399                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2400                     }
2401                 }
2402                 for(i=0; i<s->max_b_frames+1; i++){
2403                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2404                 }
2405
2406                 b_frames= FFMAX(0, i-1);
2407
2408                 /* reset scores */
2409                 for(i=0; i<b_frames+1; i++){
2410                     s->input_picture[i]->b_frame_score=0;
2411                 }
2412             }else if(s->avctx->b_frame_strategy==2){
2413                 b_frames= estimate_best_b_count(s);
2414             }else{
2415                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2416                 b_frames=0;
2417             }
2418
2419             emms_c();
2420 //static int b_count=0;
2421 //b_count+= b_frames;
2422 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2423
2424             for(i= b_frames - 1; i>=0; i--){
2425                 int type= s->input_picture[i]->pict_type;
2426                 if(type && type != B_TYPE)
2427                     b_frames= i;
2428             }
2429             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2430                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2431             }
2432
2433             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2434               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2435                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2436               }else{
2437                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2438                     b_frames=0;
2439                 s->input_picture[b_frames]->pict_type= I_TYPE;
2440               }
2441             }
2442
2443             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2444                && b_frames
2445                && s->input_picture[b_frames]->pict_type== I_TYPE)
2446                 b_frames--;
2447
2448             s->reordered_input_picture[0]= s->input_picture[b_frames];
2449             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2450                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2451             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2452             for(i=0; i<b_frames; i++){
2453                 s->reordered_input_picture[i+1]= s->input_picture[i];
2454                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2455                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2456             }
2457         }
2458     }
2459 no_output_pic:
2460     if(s->reordered_input_picture[0]){
2461         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2462
2463         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2464
2465         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2466             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2467
2468             int i= ff_find_unused_picture(s, 0);
2469             Picture *pic= &s->picture[i];
2470
2471             pic->reference              = s->reordered_input_picture[0]->reference;
2472             alloc_picture(s, pic, 0);
2473
2474             /* mark us unused / free shared pic */
2475             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2476                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2477             for(i=0; i<4; i++)
2478                 s->reordered_input_picture[0]->data[i]= NULL;
2479             s->reordered_input_picture[0]->type= 0;
2480
2481             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2482
2483             s->current_picture_ptr= pic;
2484         }else{
2485             // input is not a shared pix -> reuse buffer for current_pix
2486
2487             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2488                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2489
2490             s->current_picture_ptr= s->reordered_input_picture[0];
2491             for(i=0; i<4; i++){
2492                 s->new_picture.data[i]+= INPLACE_OFFSET;
2493             }
2494         }
2495         copy_picture(&s->current_picture, s->current_picture_ptr);
2496
2497         s->picture_number= s->new_picture.display_picture_number;
2498 //printf("dpn:%d\n", s->picture_number);
2499     }else{
2500        memset(&s->new_picture, 0, sizeof(Picture));
2501     }
2502 }
2503
2504 int MPV_encode_picture(AVCodecContext *avctx,
2505                        unsigned char *buf, int buf_size, void *data)
2506 {
2507     MpegEncContext *s = avctx->priv_data;
2508     AVFrame *pic_arg = data;
2509     int i, stuffing_count;
2510
2511     for(i=0; i<avctx->thread_count; i++){
2512         int start_y= s->thread_context[i]->start_mb_y;
2513         int   end_y= s->thread_context[i]->  end_mb_y;
2514         int h= s->mb_height;
2515         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2516         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2517
2518         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2519     }
2520
2521     s->picture_in_gop_number++;
2522
2523     if(load_input_picture(s, pic_arg) < 0)
2524         return -1;
2525
2526     select_input_picture(s);
2527
2528     /* output? */
2529     if(s->new_picture.data[0]){
2530         s->pict_type= s->new_picture.pict_type;
2531 //emms_c();
2532 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2533         MPV_frame_start(s, avctx);
2534 vbv_retry:
2535         if (encode_picture(s, s->picture_number) < 0)
2536             return -1;
2537
2538         avctx->real_pict_num  = s->picture_number;
2539         avctx->header_bits = s->header_bits;
2540         avctx->mv_bits     = s->mv_bits;
2541         avctx->misc_bits   = s->misc_bits;
2542         avctx->i_tex_bits  = s->i_tex_bits;
2543         avctx->p_tex_bits  = s->p_tex_bits;
2544         avctx->i_count     = s->i_count;
2545         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2546         avctx->skip_count  = s->skip_count;
2547
2548         MPV_frame_end(s);
2549
2550         if (ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
2551             mjpeg_picture_trailer(s);
2552
2553         if(avctx->rc_buffer_size){
2554             RateControlContext *rcc= &s->rc_context;
2555             int max_size= rcc->buffer_index/3;
2556
2557             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2558                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2559                 if(s->adaptive_quant){
2560                     int i;
2561                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2562                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2563                 }
2564                 s->mb_skipped = 0;        //done in MPV_frame_start()
2565                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2566                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2567                         s->no_rounding ^= 1;
2568                 }
2569                 if(s->pict_type!=B_TYPE){
2570                     s->time_base= s->last_time_base;
2571                     s->last_non_b_time= s->time - s->pp_time;
2572                 }
2573 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2574                 for(i=0; i<avctx->thread_count; i++){
2575                     PutBitContext *pb= &s->thread_context[i]->pb;
2576                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2577                 }
2578                 goto vbv_retry;
2579             }
2580
2581             assert(s->avctx->rc_max_rate);
2582         }
2583
2584         if(s->flags&CODEC_FLAG_PASS1)
2585             ff_write_pass1_stats(s);
2586
2587         for(i=0; i<4; i++){
2588             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2589             avctx->error[i] += s->current_picture_ptr->error[i];
2590         }
2591
2592         if(s->flags&CODEC_FLAG_PASS1)
2593             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2594         flush_put_bits(&s->pb);
2595         s->frame_bits  = put_bits_count(&s->pb);
2596
2597         stuffing_count= ff_vbv_update(s, s->frame_bits);
2598         if(stuffing_count){
2599             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2600                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2601                 return -1;
2602             }
2603
2604             switch(s->codec_id){
2605             case CODEC_ID_MPEG1VIDEO:
2606             case CODEC_ID_MPEG2VIDEO:
2607                 while(stuffing_count--){
2608                     put_bits(&s->pb, 8, 0);
2609                 }
2610             break;
2611             case CODEC_ID_MPEG4:
2612                 put_bits(&s->pb, 16, 0);
2613                 put_bits(&s->pb, 16, 0x1C3);
2614                 stuffing_count -= 4;
2615                 while(stuffing_count--){
2616                     put_bits(&s->pb, 8, 0xFF);
2617                 }
2618             break;
2619             default:
2620                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2621             }
2622             flush_put_bits(&s->pb);
2623             s->frame_bits  = put_bits_count(&s->pb);
2624         }
2625
2626         /* update mpeg1/2 vbv_delay for CBR */
2627         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2628            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2629             int vbv_delay;
2630
2631             assert(s->repeat_first_field==0);
2632
2633             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2634             assert(vbv_delay < 0xFFFF);
2635
2636             s->vbv_delay_ptr[0] &= 0xF8;
2637             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2638             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2639             s->vbv_delay_ptr[2] &= 0x07;
2640             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2641         }
2642         s->total_bits += s->frame_bits;
2643         avctx->frame_bits  = s->frame_bits;
2644     }else{
2645         assert((pbBufPtr(&s->pb) == s->pb.buf));
2646         s->frame_bits=0;
2647     }
2648     assert((s->frame_bits&7)==0);
2649
2650     return s->frame_bits/8;
2651 }
2652
2653 #endif //CONFIG_ENCODERS
2654
2655 static inline void gmc1_motion(MpegEncContext *s,
2656                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2657                                uint8_t **ref_picture)
2658 {
2659     uint8_t *ptr;
2660     int offset, src_x, src_y, linesize, uvlinesize;
2661     int motion_x, motion_y;
2662     int emu=0;
2663
2664     motion_x= s->sprite_offset[0][0];
2665     motion_y= s->sprite_offset[0][1];
2666     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2667     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2668     motion_x<<=(3-s->sprite_warping_accuracy);
2669     motion_y<<=(3-s->sprite_warping_accuracy);
2670     src_x = av_clip(src_x, -16, s->width);
2671     if (src_x == s->width)
2672         motion_x =0;
2673     src_y = av_clip(src_y, -16, s->height);
2674     if (src_y == s->height)
2675         motion_y =0;
2676
2677     linesize = s->linesize;
2678     uvlinesize = s->uvlinesize;
2679
2680     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2681
2682     if(s->flags&CODEC_FLAG_EMU_EDGE){
2683         if(   (unsigned)src_x >= s->h_edge_pos - 17
2684            || (unsigned)src_y >= s->v_edge_pos - 17){
2685             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2686             ptr= s->edge_emu_buffer;
2687         }
2688     }
2689
2690     if((motion_x|motion_y)&7){
2691         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2692         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2693     }else{
2694         int dxy;
2695
2696         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2697         if (s->no_rounding){
2698             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2699         }else{
2700             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2701         }
2702     }
2703
2704     if(s->flags&CODEC_FLAG_GRAY) return;
2705
2706     motion_x= s->sprite_offset[1][0];
2707     motion_y= s->sprite_offset[1][1];
2708     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2709     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2710     motion_x<<=(3-s->sprite_warping_accuracy);
2711     motion_y<<=(3-s->sprite_warping_accuracy);
2712     src_x = av_clip(src_x, -8, s->width>>1);
2713     if (src_x == s->width>>1)
2714         motion_x =0;
2715     src_y = av_clip(src_y, -8, s->height>>1);
2716     if (src_y == s->height>>1)
2717         motion_y =0;
2718
2719     offset = (src_y * uvlinesize) + src_x;
2720     ptr = ref_picture[1] + offset;
2721     if(s->flags&CODEC_FLAG_EMU_EDGE){
2722         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2723            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2724             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2725             ptr= s->edge_emu_buffer;
2726             emu=1;
2727         }
2728     }
2729     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2730
2731     ptr = ref_picture[2] + offset;
2732     if(emu){
2733         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2734         ptr= s->edge_emu_buffer;
2735     }
2736     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2737
2738     return;
2739 }
2740
2741 static inline void gmc_motion(MpegEncContext *s,
2742                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2743                                uint8_t **ref_picture)
2744 {
2745     uint8_t *ptr;
2746     int linesize, uvlinesize;
2747     const int a= s->sprite_warping_accuracy;
2748     int ox, oy;
2749
2750     linesize = s->linesize;
2751     uvlinesize = s->uvlinesize;
2752
2753     ptr = ref_picture[0];
2754
2755     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2756     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2757
2758     s->dsp.gmc(dest_y, ptr, linesize, 16,
2759            ox,
2760            oy,
2761            s->sprite_delta[0][0], s->sprite_delta[0][1],
2762            s->sprite_delta[1][0], s->sprite_delta[1][1],
2763            a+1, (1<<(2*a+1)) - s->no_rounding,
2764            s->h_edge_pos, s->v_edge_pos);
2765     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2766            ox + s->sprite_delta[0][0]*8,
2767            oy + s->sprite_delta[1][0]*8,
2768            s->sprite_delta[0][0], s->sprite_delta[0][1],
2769            s->sprite_delta[1][0], s->sprite_delta[1][1],
2770            a+1, (1<<(2*a+1)) - s->no_rounding,
2771            s->h_edge_pos, s->v_edge_pos);
2772
2773     if(s->flags&CODEC_FLAG_GRAY) return;
2774
2775     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2776     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2777
2778     ptr = ref_picture[1];
2779     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2780            ox,
2781            oy,
2782            s->sprite_delta[0][0], s->sprite_delta[0][1],
2783            s->sprite_delta[1][0], s->sprite_delta[1][1],
2784            a+1, (1<<(2*a+1)) - s->no_rounding,
2785            s->h_edge_pos>>1, s->v_edge_pos>>1);
2786
2787     ptr = ref_picture[2];
2788     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2789            ox,
2790            oy,
2791            s->sprite_delta[0][0], s->sprite_delta[0][1],
2792            s->sprite_delta[1][0], s->sprite_delta[1][1],
2793            a+1, (1<<(2*a+1)) - s->no_rounding,
2794            s->h_edge_pos>>1, s->v_edge_pos>>1);
2795 }
2796
2797 /**
2798  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2799  * @param buf destination buffer
2800  * @param src source buffer
2801  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2802  * @param block_w width of block
2803  * @param block_h height of block
2804  * @param src_x x coordinate of the top left sample of the block in the source buffer
2805  * @param src_y y coordinate of the top left sample of the block in the source buffer
2806  * @param w width of the source buffer
2807  * @param h height of the source buffer
2808  */
2809 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2810                                     int src_x, int src_y, int w, int h){
2811     int x, y;
2812     int start_y, start_x, end_y, end_x;
2813
2814     if(src_y>= h){
2815         src+= (h-1-src_y)*linesize;
2816         src_y=h-1;
2817     }else if(src_y<=-block_h){
2818         src+= (1-block_h-src_y)*linesize;
2819         src_y=1-block_h;
2820     }
2821     if(src_x>= w){
2822         src+= (w-1-src_x);
2823         src_x=w-1;
2824     }else if(src_x<=-block_w){
2825         src+= (1-block_w-src_x);
2826         src_x=1-block_w;
2827     }
2828
2829     start_y= FFMAX(0, -src_y);
2830     start_x= FFMAX(0, -src_x);
2831     end_y= FFMIN(block_h, h-src_y);
2832     end_x= FFMIN(block_w, w-src_x);
2833
2834     // copy existing part
2835     for(y=start_y; y<end_y; y++){
2836         for(x=start_x; x<end_x; x++){
2837             buf[x + y*linesize]= src[x + y*linesize];
2838         }
2839     }
2840
2841     //top
2842     for(y=0; y<start_y; y++){
2843         for(x=start_x; x<end_x; x++){
2844             buf[x + y*linesize]= buf[x + start_y*linesize];
2845         }
2846     }
2847
2848     //bottom
2849     for(y=end_y; y<block_h; y++){
2850         for(x=start_x; x<end_x; x++){
2851             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2852         }
2853     }
2854
2855     for(y=0; y<block_h; y++){
2856        //left
2857         for(x=0; x<start_x; x++){
2858             buf[x + y*linesize]= buf[start_x + y*linesize];
2859         }
2860
2861        //right
2862         for(x=end_x; x<block_w; x++){
2863             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2864         }
2865     }
2866 }
2867
2868 static inline int hpel_motion(MpegEncContext *s,
2869                                   uint8_t *dest, uint8_t *src,
2870                                   int field_based, int field_select,
2871                                   int src_x, int src_y,
2872                                   int width, int height, int stride,
2873                                   int h_edge_pos, int v_edge_pos,
2874                                   int w, int h, op_pixels_func *pix_op,
2875                                   int motion_x, int motion_y)
2876 {
2877     int dxy;
2878     int emu=0;
2879
2880     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2881     src_x += motion_x >> 1;
2882     src_y += motion_y >> 1;
2883
2884     /* WARNING: do no forget half pels */
2885     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2886     if (src_x == width)
2887         dxy &= ~1;
2888     src_y = av_clip(src_y, -16, height);
2889     if (src_y == height)
2890         dxy &= ~2;
2891     src += src_y * stride + src_x;
2892
2893     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2894         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2895            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2896             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2897                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2898             src= s->edge_emu_buffer;
2899             emu=1;
2900         }
2901     }
2902     if(field_select)
2903         src += s->linesize;
2904     pix_op[dxy](dest, src, stride, h);
2905     return emu;
2906 }
2907
2908 static inline int hpel_motion_lowres(MpegEncContext *s,
2909                                   uint8_t *dest, uint8_t *src,
2910                                   int field_based, int field_select,
2911                                   int src_x, int src_y,
2912                                   int width, int height, int stride,
2913                                   int h_edge_pos, int v_edge_pos,
2914                                   int w, int h, h264_chroma_mc_func *pix_op,
2915                                   int motion_x, int motion_y)
2916 {
2917     const int lowres= s->avctx->lowres;
2918     const int s_mask= (2<<lowres)-1;
2919     int emu=0;
2920     int sx, sy;
2921
2922     if(s->quarter_sample){
2923         motion_x/=2;
2924         motion_y/=2;
2925     }
2926
2927     sx= motion_x & s_mask;
2928     sy= motion_y & s_mask;
2929     src_x += motion_x >> (lowres+1);
2930     src_y += motion_y >> (lowres+1);
2931
2932     src += src_y * stride + src_x;
2933
2934     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2935        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2936         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2937                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2938         src= s->edge_emu_buffer;
2939         emu=1;
2940     }
2941
2942     sx <<= 2 - lowres;
2943     sy <<= 2 - lowres;
2944     if(field_select)
2945         src += s->linesize;
2946     pix_op[lowres](dest, src, stride, h, sx, sy);
2947     return emu;
2948 }
2949
2950 /* apply one mpeg motion vector to the three components */
2951 static av_always_inline void mpeg_motion(MpegEncContext *s,
2952                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2953                                int field_based, int bottom_field, int field_select,
2954                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2955                                int motion_x, int motion_y, int h)
2956 {
2957     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2958     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2959
2960 #if 0
2961 if(s->quarter_sample)
2962 {
2963     motion_x>>=1;
2964     motion_y>>=1;
2965 }
2966 #endif
2967
2968     v_edge_pos = s->v_edge_pos >> field_based;
2969     linesize   = s->current_picture.linesize[0] << field_based;
2970     uvlinesize = s->current_picture.linesize[1] << field_based;
2971
2972     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2973     src_x = s->mb_x* 16               + (motion_x >> 1);
2974     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2975
2976     if (s->out_format == FMT_H263) {
2977         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2978             mx = (motion_x>>1)|(motion_x&1);
2979             my = motion_y >>1;
2980             uvdxy = ((my & 1) << 1) | (mx & 1);
2981             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2982             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2983         }else{
2984             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2985             uvsrc_x = src_x>>1;
2986             uvsrc_y = src_y>>1;
2987         }
2988     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2989         mx = motion_x / 4;
2990         my = motion_y / 4;
2991         uvdxy = 0;
2992         uvsrc_x = s->mb_x*8 + mx;
2993         uvsrc_y = s->mb_y*8 + my;
2994     } else {
2995         if(s->chroma_y_shift){
2996             mx = motion_x / 2;
2997             my = motion_y / 2;
2998             uvdxy = ((my & 1) << 1) | (mx & 1);
2999             uvsrc_x = s->mb_x* 8               + (mx >> 1);
3000             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
3001         } else {
3002             if(s->chroma_x_shift){
3003             //Chroma422
3004                 mx = motion_x / 2;
3005                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3006                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3007                 uvsrc_y = src_y;
3008             } else {
3009             //Chroma444
3010                 uvdxy = dxy;
3011                 uvsrc_x = src_x;
3012                 uvsrc_y = src_y;
3013             }
3014         }
3015     }
3016
3017     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3018     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3019     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3020
3021     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3022        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3023             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3024                s->codec_id == CODEC_ID_MPEG1VIDEO){
3025                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3026                 return ;
3027             }
3028             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3029                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3030             ptr_y = s->edge_emu_buffer;
3031             if(!(s->flags&CODEC_FLAG_GRAY)){
3032                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3033                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3034                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3035                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3036                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3037                 ptr_cb= uvbuf;
3038                 ptr_cr= uvbuf+16;
3039             }
3040     }
3041
3042     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3043         dest_y += s->linesize;
3044         dest_cb+= s->uvlinesize;
3045         dest_cr+= s->uvlinesize;
3046     }
3047
3048     if(field_select){
3049         ptr_y += s->linesize;
3050         ptr_cb+= s->uvlinesize;
3051         ptr_cr+= s->uvlinesize;
3052     }
3053
3054     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3055
3056     if(!(s->flags&CODEC_FLAG_GRAY)){
3057         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3058         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3059     }
3060 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3061     if(s->out_format == FMT_H261){
3062         ff_h261_loop_filter(s);
3063     }
3064 #endif
3065 }
3066
3067 /* apply one mpeg motion vector to the three components */
3068 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3069                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3070                                int field_based, int bottom_field, int field_select,
3071                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3072                                int motion_x, int motion_y, int h)
3073 {
3074     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3075     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3076     const int lowres= s->avctx->lowres;
3077     const int block_s= 8>>lowres;
3078     const int s_mask= (2<<lowres)-1;
3079     const int h_edge_pos = s->h_edge_pos >> lowres;
3080     const int v_edge_pos = s->v_edge_pos >> lowres;
3081     linesize   = s->current_picture.linesize[0] << field_based;
3082     uvlinesize = s->current_picture.linesize[1] << field_based;
3083
3084     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3085         motion_x/=2;
3086         motion_y/=2;
3087     }
3088
3089     if(field_based){
3090         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3091     }
3092
3093     sx= motion_x & s_mask;
3094     sy= motion_y & s_mask;
3095     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3096     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3097
3098     if (s->out_format == FMT_H263) {
3099         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3100         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3101         uvsrc_x = src_x>>1;
3102         uvsrc_y = src_y>>1;
3103     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3104         mx = motion_x / 4;
3105         my = motion_y / 4;
3106         uvsx = (2*mx) & s_mask;
3107         uvsy = (2*my) & s_mask;
3108         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3109         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3110     } else {
3111         mx = motion_x / 2;
3112         my = motion_y / 2;
3113         uvsx = mx & s_mask;
3114         uvsy = my & s_mask;
3115         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3116         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3117     }
3118
3119     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3120     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3121     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3122
3123     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3124        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3125             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3126                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3127             ptr_y = s->edge_emu_buffer;
3128             if(!(s->flags&CODEC_FLAG_GRAY)){
3129                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3130                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3131                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3132                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3133                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3134                 ptr_cb= uvbuf;
3135                 ptr_cr= uvbuf+16;
3136             }
3137     }
3138
3139     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3140         dest_y += s->linesize;
3141         dest_cb+= s->uvlinesize;
3142         dest_cr+= s->uvlinesize;
3143     }
3144
3145     if(field_select){
3146         ptr_y += s->linesize;
3147         ptr_cb+= s->uvlinesize;
3148         ptr_cr+= s->uvlinesize;
3149     }
3150
3151     sx <<= 2 - lowres;
3152     sy <<= 2 - lowres;
3153     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3154
3155     if(!(s->flags&CODEC_FLAG_GRAY)){
3156         uvsx <<= 2 - lowres;
3157         uvsy <<= 2 - lowres;
3158         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3159         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3160     }
3161     //FIXME h261 lowres loop filter
3162 }
3163
3164 //FIXME move to dsputil, avg variant, 16x16 version
3165 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3166     int x;
3167     uint8_t * const top   = src[1];
3168     uint8_t * const left  = src[2];
3169     uint8_t * const mid   = src[0];
3170     uint8_t * const right = src[3];
3171     uint8_t * const bottom= src[4];
3172 #define OBMC_FILTER(x, t, l, m, r, b)\
3173     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3174 #define OBMC_FILTER4(x, t, l, m, r, b)\
3175     OBMC_FILTER(x         , t, l, m, r, b);\
3176     OBMC_FILTER(x+1       , t, l, m, r, b);\
3177     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3178     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3179
3180     x=0;
3181     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3182     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3183     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3184     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3185     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3186     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3187     x+= stride;
3188     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3189     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3190     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3191     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3192     x+= stride;
3193     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3194     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3195     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3196     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3197     x+= 2*stride;
3198     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3199     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3200     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3201     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3202     x+= 2*stride;
3203     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3204     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3205     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3206     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3207     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3208     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3209     x+= stride;
3210     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3211     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3212     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3213     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3214 }
3215
3216 /* obmc for 1 8x8 luma block */
3217 static inline void obmc_motion(MpegEncContext *s,
3218                                uint8_t *dest, uint8_t *src,
3219                                int src_x, int src_y,
3220                                op_pixels_func *pix_op,
3221                                int16_t mv[5][2]/* mid top left right bottom*/)
3222 #define MID    0
3223 {
3224     int i;
3225     uint8_t *ptr[5];
3226
3227     assert(s->quarter_sample==0);
3228
3229     for(i=0; i<5; i++){
3230         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3231             ptr[i]= ptr[MID];
3232         }else{
3233             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3234             hpel_motion(s, ptr[i], src, 0, 0,
3235                         src_x, src_y,
3236                         s->width, s->height, s->linesize,
3237                         s->h_edge_pos, s->v_edge_pos,
3238                         8, 8, pix_op,
3239                         mv[i][0], mv[i][1]);
3240         }
3241     }
3242
3243     put_obmc(dest, ptr, s->linesize);
3244 }
3245
3246 static inline void qpel_motion(MpegEncContext *s,
3247                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3248                                int field_based, int bottom_field, int field_select,
3249                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3250                                qpel_mc_func (*qpix_op)[16],
3251                                int motion_x, int motion_y, int h)
3252 {
3253     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3254     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3255
3256     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3257     src_x = s->mb_x *  16                 + (motion_x >> 2);
3258     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3259
3260     v_edge_pos = s->v_edge_pos >> field_based;
3261     linesize = s->linesize << field_based;
3262     uvlinesize = s->uvlinesize << field_based;
3263
3264     if(field_based){
3265         mx= motion_x/2;
3266         my= motion_y>>1;
3267     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3268         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3269         mx= (motion_x>>1) + rtab[motion_x&7];
3270         my= (motion_y>>1) + rtab[motion_y&7];
3271     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3272         mx= (motion_x>>1)|(motion_x&1);
3273         my= (motion_y>>1)|(motion_y&1);
3274     }else{
3275         mx= motion_x/2;
3276         my= motion_y/2;
3277     }
3278     mx= (mx>>1)|(mx&1);
3279     my= (my>>1)|(my&1);
3280
3281     uvdxy= (mx&1) | ((my&1)<<1);
3282     mx>>=1;
3283     my>>=1;
3284
3285     uvsrc_x = s->mb_x *  8                 + mx;
3286     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3287
3288     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3289     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3290     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3291
3292     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3293        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3294         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3295                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3296         ptr_y= s->edge_emu_buffer;
3297         if(!(s->flags&CODEC_FLAG_GRAY)){
3298             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3299             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3300                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3301             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3302                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3303             ptr_cb= uvbuf;
3304             ptr_cr= uvbuf + 16;
3305         }
3306     }
3307
3308     if(!field_based)
3309         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3310     else{
3311         if(bottom_field){
3312             dest_y += s->linesize;
3313             dest_cb+= s->uvlinesize;
3314             dest_cr+= s->uvlinesize;
3315         }
3316
3317         if(field_select){
3318             ptr_y  += s->linesize;
3319             ptr_cb += s->uvlinesize;
3320             ptr_cr += s->uvlinesize;
3321         }
3322         //damn interlaced mode
3323         //FIXME boundary mirroring is not exactly correct here
3324         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3325         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3326     }
3327     if(!(s->flags&CODEC_FLAG_GRAY)){
3328         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3329         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3330     }
3331 }
3332
3333 inline int ff_h263_round_chroma(int x){
3334     if (x >= 0)
3335         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3336     else {
3337         x = -x;
3338         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3339     }
3340 }
3341
3342 /**
3343  * h263 chorma 4mv motion compensation.
3344  */
3345 static inline void chroma_4mv_motion(MpegEncContext *s,
3346                                      uint8_t *dest_cb, uint8_t *dest_cr,
3347                                      uint8_t **ref_picture,
3348                                      op_pixels_func *pix_op,
3349                                      int mx, int my){
3350     int dxy, emu=0, src_x, src_y, offset;
3351     uint8_t *ptr;
3352
3353     /* In case of 8X8, we construct a single chroma motion vector
3354        with a special rounding */
3355     mx= ff_h263_round_chroma(mx);
3356     my= ff_h263_round_chroma(my);
3357
3358     dxy = ((my & 1) << 1) | (mx & 1);
3359     mx >>= 1;
3360     my >>= 1;
3361
3362     src_x = s->mb_x * 8 + mx;
3363     src_y = s->mb_y * 8 + my;
3364     src_x = av_clip(src_x, -8, s->width/2);
3365     if (src_x == s->width/2)
3366         dxy &= ~1;
3367     src_y = av_clip(src_y, -8, s->height/2);
3368     if (src_y == s->height/2)
3369         dxy &= ~2;
3370
3371     offset = (src_y * (s->uvlinesize)) + src_x;
3372     ptr = ref_picture[1] + offset;
3373     if(s->flags&CODEC_FLAG_EMU_EDGE){
3374         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3375            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3376             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3377             ptr= s->edge_emu_buffer;
3378             emu=1;
3379         }
3380     }
3381     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3382
3383     ptr = ref_picture[2] + offset;
3384     if(emu){
3385         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3386         ptr= s->edge_emu_buffer;
3387     }
3388     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3389 }
3390
3391 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3392                                      uint8_t *dest_cb, uint8_t *dest_cr,
3393                                      uint8_t **ref_picture,
3394                                      h264_chroma_mc_func *pix_op,
3395                                      int mx, int my){
3396     const int lowres= s->avctx->lowres;
3397     const int block_s= 8>>lowres;
3398     const int s_mask= (2<<lowres)-1;
3399     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3400     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3401     int emu=0, src_x, src_y, offset, sx, sy;
3402     uint8_t *ptr;
3403
3404     if(s->quarter_sample){
3405         mx/=2;
3406         my/=2;
3407     }
3408
3409     /* In case of 8X8, we construct a single chroma motion vector
3410        with a special rounding */
3411     mx= ff_h263_round_chroma(mx);
3412     my= ff_h263_round_chroma(my);
3413
3414     sx= mx & s_mask;
3415     sy= my & s_mask;
3416     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3417     src_y = s->mb_y*block_s + (my >> (lowres+1));
3418
3419     offset = src_y * s->uvlinesize + src_x;
3420     ptr = ref_picture[1] + offset;
3421     if(s->flags&CODEC_FLAG_EMU_EDGE){
3422         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3423            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3424             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3425             ptr= s->edge_emu_buffer;
3426             emu=1;
3427         }
3428     }
3429     sx <<= 2 - lowres;
3430     sy <<= 2 - lowres;
3431     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3432
3433     ptr = ref_picture[2] + offset;
3434     if(emu){
3435         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3436         ptr= s->edge_emu_buffer;
3437     }
3438     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3439 }
3440
3441 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3442     /* fetch pixels for estimated mv 4 macroblocks ahead
3443      * optimized for 64byte cache lines */
3444     const int shift = s->quarter_sample ? 2 : 1;
3445     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3446     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3447     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3448     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3449     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3450     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3451 }
3452
3453 /**
3454  * motion compensation of a single macroblock
3455  * @param s context
3456  * @param dest_y luma destination pointer
3457  * @param dest_cb chroma cb/u destination pointer
3458  * @param dest_cr chroma cr/v destination pointer
3459  * @param dir direction (0->forward, 1->backward)
3460  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3461  * @param pic_op halfpel motion compensation function (average or put normally)
3462  * @param pic_op qpel motion compensation function (average or put normally)
3463  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3464  */
3465 static inline void MPV_motion(MpegEncContext *s,
3466                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3467                               int dir, uint8_t **ref_picture,
3468                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3469 {
3470     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3471     int mb_x, mb_y, i;
3472     uint8_t *ptr, *dest;
3473
3474     mb_x = s->mb_x;
3475     mb_y = s->mb_y;
3476
3477     prefetch_motion(s, ref_picture, dir);
3478
3479     if(s->obmc && s->pict_type != B_TYPE){
3480         int16_t mv_cache[4][4][2];
3481         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3482         const int mot_stride= s->b8_stride;
3483         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3484
3485         assert(!s->mb_skipped);
3486
3487         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3488         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3489         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3490
3491         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3492             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3493         }else{
3494             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3495         }
3496
3497         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3498             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3499             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3500         }else{
3501             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3502             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3503         }
3504
3505         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3506             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3507             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3508         }else{
3509             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3510             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3511         }
3512
3513         mx = 0;
3514         my = 0;
3515         for(i=0;i<4;i++) {
3516             const int x= (i&1)+1;
3517             const int y= (i>>1)+1;
3518             int16_t mv[5][2]= {
3519                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3520                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3521                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3522                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3523                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3524             //FIXME cleanup
3525             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3526                         ref_picture[0],
3527                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3528                         pix_op[1],
3529                         mv);
3530
3531             mx += mv[0][0];
3532             my += mv[0][1];
3533         }
3534         if(!(s->flags&CODEC_FLAG_GRAY))
3535             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3536
3537         return;
3538     }
3539
3540     switch(s->mv_type) {
3541     case MV_TYPE_16X16:
3542         if(s->mcsel){
3543             if(s->real_sprite_warping_points==1){
3544                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3545                             ref_picture);
3546             }else{
3547                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3548                             ref_picture);
3549             }
3550         }else if(s->quarter_sample){
3551             qpel_motion(s, dest_y, dest_cb, dest_cr,
3552                         0, 0, 0,
3553                         ref_picture, pix_op, qpix_op,
3554                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3555         }else if(ENABLE_WMV2 && s->mspel){
3556             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3557                         ref_picture, pix_op,
3558                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3559         }else
3560         {
3561             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3562                         0, 0, 0,
3563                         ref_picture, pix_op,
3564                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3565         }
3566         break;
3567     case MV_TYPE_8X8:
3568         mx = 0;
3569         my = 0;
3570         if(s->quarter_sample){
3571             for(i=0;i<4;i++) {
3572                 motion_x = s->mv[dir][i][0];
3573                 motion_y = s->mv[dir][i][1];
3574
3575                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3576                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3577                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3578
3579                 /* WARNING: do no forget half pels */
3580                 src_x = av_clip(src_x, -16, s->width);
3581                 if (src_x == s->width)
3582                     dxy &= ~3;
3583                 src_y = av_clip(src_y, -16, s->height);
3584                 if (src_y == s->height)
3585                     dxy &= ~12;
3586
3587                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3588                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3589                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3590                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3591                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3592                         ptr= s->edge_emu_buffer;
3593                     }
3594                 }
3595                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3596                 qpix_op[1][dxy](dest, ptr, s->linesize);
3597
3598                 mx += s->mv[dir][i][0]/2;
3599                 my += s->mv[dir][i][1]/2;
3600             }
3601         }else{
3602             for(i=0;i<4;i++) {
3603                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3604                             ref_picture[0], 0, 0,
3605                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3606                             s->width, s->height, s->linesize,
3607                             s->h_edge_pos, s->v_edge_pos,
3608                             8, 8, pix_op[1],
3609                             s->mv[dir][i][0], s->mv[dir][i][1]);
3610
3611                 mx += s->mv[dir][i][0];
3612                 my += s->mv[dir][i][1];
3613             }
3614         }
3615
3616         if(!(s->flags&CODEC_FLAG_GRAY))
3617             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3618         break;
3619     case MV_TYPE_FIELD:
3620         if (s->picture_structure == PICT_FRAME) {
3621             if(s->quarter_sample){
3622                 for(i=0; i<2; i++){
3623                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3624                                 1, i, s->field_select[dir][i],
3625                                 ref_picture, pix_op, qpix_op,
3626                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3627                 }
3628             }else{
3629                 /* top field */
3630                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3631                             1, 0, s->field_select[dir][0],
3632                             ref_picture, pix_op,
3633                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3634                 /* bottom field */
3635                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3636                             1, 1, s->field_select[dir][1],
3637                             ref_picture, pix_op,
3638                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3639             }
3640         } else {
3641             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3642                 ref_picture= s->current_picture_ptr->data;
3643             }
3644
3645             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3646                         0, 0, s->field_select[dir][0],
3647                         ref_picture, pix_op,
3648                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3649         }
3650         break;
3651     case MV_TYPE_16X8:
3652         for(i=0; i<2; i++){
3653             uint8_t ** ref2picture;
3654
3655             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3656                 ref2picture= ref_picture;
3657             }else{
3658                 ref2picture= s->current_picture_ptr->data;
3659             }
3660
3661             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3662                         0, 0, s->field_select[dir][i],
3663                         ref2picture, pix_op,
3664                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3665
3666             dest_y += 16*s->linesize;
3667             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3668             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3669         }
3670         break;
3671     case MV_TYPE_DMV:
3672         if(s->picture_structure == PICT_FRAME){
3673             for(i=0; i<2; i++){
3674                 int j;
3675                 for(j=0; j<2; j++){
3676                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3677                                 1, j, j^i,
3678                                 ref_picture, pix_op,
3679                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3680                 }
3681                 pix_op = s->dsp.avg_pixels_tab;
3682             }
3683         }else{
3684             for(i=0; i<2; i++){
3685                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3686                             0, 0, s->picture_structure != i+1,
3687                             ref_picture, pix_op,
3688                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3689
3690                 // after put we make avg of the same block
3691                 pix_op=s->dsp.avg_pixels_tab;
3692
3693                 //opposite parity is always in the same frame if this is second field
3694                 if(!s->first_field){
3695                     ref_picture = s->current_picture_ptr->data;
3696                 }
3697             }
3698         }
3699     break;
3700     default: assert(0);
3701     }
3702 }
3703
3704 /**
3705  * motion compensation of a single macroblock
3706  * @param s context
3707  * @param dest_y luma destination pointer
3708  * @param dest_cb chroma cb/u destination pointer
3709  * @param dest_cr chroma cr/v destination pointer
3710  * @param dir direction (0->forward, 1->backward)
3711  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3712  * @param pic_op halfpel motion compensation function (average or put normally)
3713  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3714  */
3715 static inline void MPV_motion_lowres(MpegEncContext *s,
3716                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3717                               int dir, uint8_t **ref_picture,
3718                               h264_chroma_mc_func *pix_op)
3719 {
3720     int mx, my;
3721     int mb_x, mb_y, i;
3722     const int lowres= s->avctx->lowres;
3723     const int block_s= 8>>lowres;
3724
3725     mb_x = s->mb_x;
3726     mb_y = s->mb_y;
3727
3728     switch(s->mv_type) {
3729     case MV_TYPE_16X16:
3730         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3731                     0, 0, 0,
3732                     ref_picture, pix_op,
3733                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3734         break;
3735     case MV_TYPE_8X8:
3736         mx = 0;
3737         my = 0;
3738             for(i=0;i<4;i++) {
3739                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3740                             ref_picture[0], 0, 0,
3741                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3742                             s->width, s->height, s->linesize,
3743                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3744                             block_s, block_s, pix_op,
3745                             s->mv[dir][i][0], s->mv[dir][i][1]);
3746
3747                 mx += s->mv[dir][i][0];
3748                 my += s->mv[dir][i][1];
3749             }
3750
3751         if(!(s->flags&CODEC_FLAG_GRAY))
3752             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3753         break;
3754     case MV_TYPE_FIELD:
3755         if (s->picture_structure == PICT_FRAME) {
3756             /* top field */
3757             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3758                         1, 0, s->field_select[dir][0],
3759                         ref_picture, pix_op,
3760                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3761             /* bottom field */
3762             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3763                         1, 1, s->field_select[dir][1],
3764                         ref_picture, pix_op,
3765                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3766         } else {
3767             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3768                 ref_picture= s->current_picture_ptr->data;
3769             }
3770
3771             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3772                         0, 0, s->field_select[dir][0],
3773                         ref_picture, pix_op,
3774                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3775         }
3776         break;
3777     case MV_TYPE_16X8:
3778         for(i=0; i<2; i++){
3779             uint8_t ** ref2picture;
3780
3781             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3782                 ref2picture= ref_picture;
3783             }else{
3784                 ref2picture= s->current_picture_ptr->data;
3785             }
3786
3787             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3788                         0, 0, s->field_select[dir][i],
3789                         ref2picture, pix_op,
3790                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3791
3792             dest_y += 2*block_s*s->linesize;
3793             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3794             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3795         }
3796         break;
3797     case MV_TYPE_DMV:
3798         if(s->picture_structure == PICT_FRAME){
3799             for(i=0; i<2; i++){
3800                 int j;
3801                 for(j=0; j<2; j++){
3802                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3803                                 1, j, j^i,
3804                                 ref_picture, pix_op,
3805                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3806                 }
3807                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3808             }
3809         }else{
3810             for(i=0; i<2; i++){
3811                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3812                             0, 0, s->picture_structure != i+1,
3813                             ref_picture, pix_op,
3814                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3815
3816                 // after put we make avg of the same block
3817                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3818
3819                 //opposite parity is always in the same frame if this is second field
3820                 if(!s->first_field){
3821                     ref_picture = s->current_picture_ptr->data;
3822                 }
3823             }
3824         }
3825     break;
3826     default: assert(0);
3827     }
3828 }
3829
3830 /* put block[] to dest[] */
3831 static inline void put_dct(MpegEncContext *s,
3832                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3833 {
3834     s->dct_unquantize_intra(s, block, i, qscale);
3835     s->dsp.idct_put (dest, line_size, block);
3836 }
3837
3838 /* add block[] to dest[] */
3839 static inline void add_dct(MpegEncContext *s,
3840                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3841 {
3842     if (s->block_last_index[i] >= 0) {
3843         s->dsp.idct_add (dest, line_size, block);
3844     }
3845 }
3846
3847 static inline void add_dequant_dct(MpegEncContext *s,
3848                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3849 {
3850     if (s->block_last_index[i] >= 0) {
3851         s->dct_unquantize_inter(s, block, i, qscale);
3852
3853         s->dsp.idct_add (dest, line_size, block);
3854     }
3855 }
3856
3857 /**
3858  * cleans dc, ac, coded_block for the current non intra MB
3859  */
3860 void ff_clean_intra_table_entries(MpegEncContext *s)
3861 {
3862     int wrap = s->b8_stride;
3863     int xy = s->block_index[0];
3864
3865     s->dc_val[0][xy           ] =
3866     s->dc_val[0][xy + 1       ] =
3867     s->dc_val[0][xy     + wrap] =
3868     s->dc_val[0][xy + 1 + wrap] = 1024;
3869     /* ac pred */
3870     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3871     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3872     if (s->msmpeg4_version>=3) {
3873         s->coded_block[xy           ] =
3874         s->coded_block[xy + 1       ] =
3875         s->coded_block[xy     + wrap] =
3876         s->coded_block[xy + 1 + wrap] = 0;
3877     }
3878     /* chroma */
3879     wrap = s->mb_stride;
3880     xy = s->mb_x + s->mb_y * wrap;
3881     s->dc_val[1][xy] =
3882     s->dc_val[2][xy] = 1024;
3883     /* ac pred */
3884     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3885     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3886
3887     s->mbintra_table[xy]= 0;
3888 }
3889
3890 /* generic function called after a macroblock has been parsed by the
3891    decoder or after it has been encoded by the encoder.
3892
3893    Important variables used:
3894    s->mb_intra : true if intra macroblock
3895    s->mv_dir   : motion vector direction
3896    s->mv_type  : motion vector type
3897    s->mv       : motion vector
3898    s->interlaced_dct : true if interlaced dct used (mpeg2)
3899  */
3900 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3901 {
3902     int mb_x, mb_y;
3903     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3904 #ifdef HAVE_XVMC
3905     if(s->avctx->xvmc_acceleration){
3906         XVMC_decode_mb(s);//xvmc uses pblocks
3907         return;
3908     }
3909 #endif
3910
3911     mb_x = s->mb_x;
3912     mb_y = s->mb_y;
3913
3914     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3915        /* save DCT coefficients */
3916        int i,j;
3917        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3918        for(i=0; i<6; i++)
3919            for(j=0; j<64; j++)
3920                *dct++ = block[i][s->dsp.idct_permutation[j]];
3921     }
3922
3923     s->current_picture.qscale_table[mb_xy]= s->qscale;
3924
3925     /* update DC predictors for P macroblocks */
3926     if (!s->mb_intra) {
3927         if (s->h263_pred || s->h263_aic) {
3928             if(s->mbintra_table[mb_xy])
3929                 ff_clean_intra_table_entries(s);
3930         } else {
3931             s->last_dc[0] =
3932             s->last_dc[1] =
3933             s->last_dc[2] = 128 << s->intra_dc_precision;
3934         }
3935     }
3936     else if (s->h263_pred || s->h263_aic)
3937         s->mbintra_table[mb_xy]=1;
3938
3939     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3940         uint8_t *dest_y, *dest_cb, *dest_cr;
3941         int dct_linesize, dct_offset;
3942         op_pixels_func (*op_pix)[4];
3943         qpel_mc_func (*op_qpix)[16];
3944         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3945         const int uvlinesize= s->current_picture.linesize[1];
3946         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3947         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3948
3949         /* avoid copy if macroblock skipped in last frame too */
3950         /* skip only during decoding as we might trash the buffers during encoding a bit */
3951         if(!s->encoding){
3952             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3953             const int age= s->current_picture.age;
3954
3955             assert(age);
3956
3957             if (s->mb_skipped) {
3958                 s->mb_skipped= 0;
3959                 assert(s->pict_type!=I_TYPE);
3960
3961                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3962                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3963
3964                 /* if previous was skipped too, then nothing to do !  */
3965                 if (*mbskip_ptr >= age && s->current_picture.reference){
3966                     return;
3967                 }
3968             } else if(!s->current_picture.reference){
3969                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3970                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3971             } else{
3972                 *mbskip_ptr = 0; /* not skipped */
3973             }
3974         }
3975
3976         dct_linesize = linesize << s->interlaced_dct;
3977         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3978
3979         if(readable){
3980             dest_y=  s->dest[0];
3981             dest_cb= s->dest[1];
3982             dest_cr= s->dest[2];
3983         }else{
3984             dest_y = s->b_scratchpad;
3985             dest_cb= s->b_scratchpad+16*linesize;
3986             dest_cr= s->b_scratchpad+32*linesize;
3987         }
3988
3989         if (!s->mb_intra) {
3990             /* motion handling */
3991             /* decoding or more than one mb_type (MC was already done otherwise) */
3992             if(!s->encoding){
3993                 if(lowres_flag){
3994                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3995
3996                     if (s->mv_dir & MV_DIR_FORWARD) {
3997                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3998                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3999                     }
4000                     if (s->mv_dir & MV_DIR_BACKWARD) {
4001                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4002                     }
4003                 }else{
4004                     op_qpix= s->me.qpel_put;
4005                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4006                         op_pix = s->dsp.put_pixels_tab;
4007                     }else{
4008                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4009                     }
4010                     if (s->mv_dir & MV_DIR_FORWARD) {
4011                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4012                         op_pix = s->dsp.avg_pixels_tab;
4013                         op_qpix= s->me.qpel_avg;
4014                     }
4015                     if (s->mv_dir & MV_DIR_BACKWARD) {
4016                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4017                     }
4018                 }
4019             }
4020
4021             /* skip dequant / idct if we are really late ;) */
4022             if(s->hurry_up>1) goto skip_idct;
4023             if(s->avctx->skip_idct){
4024                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4025                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4026                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4027                     goto skip_idct;
4028             }
4029
4030             /* add dct residue */
4031             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4032                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4033                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4034                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4035                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4036                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4037
4038                 if(!(s->flags&CODEC_FLAG_GRAY)){
4039                     if (s->chroma_y_shift){
4040                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4041                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4042                     }else{
4043                         dct_linesize >>= 1;
4044                         dct_offset >>=1;
4045                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4046                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4047                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4048                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4049                     }
4050                 }
4051             } else if(s->codec_id != CODEC_ID_WMV2){
4052                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4053                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4054                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4055                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4056
4057                 if(!(s->flags&CODEC_FLAG_GRAY)){
4058                     if(s->chroma_y_shift){//Chroma420
4059                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4060                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4061                     }else{
4062                         //chroma422
4063                         dct_linesize = uvlinesize << s->interlaced_dct;
4064                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4065
4066                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4067                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4068                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4069                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4070                         if(!s->chroma_x_shift){//Chroma444
4071                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4072                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4073                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4074                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4075                         }
4076                     }
4077                 }//fi gray
4078             }
4079             else if (ENABLE_WMV2) {
4080                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4081             }
4082         } else {
4083             /* dct only in intra block */
4084             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4085                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4086                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4087                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4088                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4089
4090                 if(!(s->flags&CODEC_FLAG_GRAY)){
4091                     if(s->chroma_y_shift){
4092                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4093                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4094                     }else{
4095                         dct_offset >>=1;
4096                         dct_linesize >>=1;
4097                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4098                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4099                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4100                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4101                     }
4102                 }
4103             }else{
4104                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4105                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4106                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4107                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4108
4109                 if(!(s->flags&CODEC_FLAG_GRAY)){
4110                     if(s->chroma_y_shift){
4111                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4112                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4113                     }else{
4114
4115                         dct_linesize = uvlinesize << s->interlaced_dct;
4116                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4117
4118                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4119                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4120                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4121                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4122                         if(!s->chroma_x_shift){//Chroma444
4123                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4124                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4125                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4126                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4127                         }
4128                     }
4129                 }//gray
4130             }
4131         }
4132 skip_idct:
4133         if(!readable){
4134             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4135             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4136             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4137         }
4138     }
4139 }
4140
4141 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4142     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4143     else                  MPV_decode_mb_internal(s, block, 0);
4144 }
4145
4146 #ifdef CONFIG_ENCODERS
4147
4148 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4149 {
4150     static const char tab[64]=
4151         {3,2,2,1,1,1,1,1,
4152          1,1,1,1,1,1,1,1,
4153          1,1,1,1,1,1,1,1,
4154          0,0,0,0,0,0,0,0,
4155          0,0,0,0,0,0,0,0,
4156          0,0,0,0,0,0,0,0,
4157          0,0,0,0,0,0,0,0,
4158          0,0,0,0,0,0,0,0};
4159     int score=0;
4160     int run=0;
4161     int i;
4162     DCTELEM *block= s->block[n];
4163     const int last_index= s->block_last_index[n];
4164     int skip_dc;
4165
4166     if(threshold<0){
4167         skip_dc=0;
4168         threshold= -threshold;
4169     }else
4170         skip_dc=1;
4171
4172     /* are all which we could set to zero are allready zero? */
4173     if(last_index<=skip_dc - 1) return;
4174
4175     for(i=0; i<=last_index; i++){
4176         const int j = s->intra_scantable.permutated[i];
4177         const int level = FFABS(block[j]);
4178         if(level==1){
4179             if(skip_dc && i==0) continue;
4180             score+= tab[run];
4181             run=0;
4182         }else if(level>1){
4183             return;
4184         }else{
4185             run++;
4186         }
4187     }
4188     if(score >= threshold) return;
4189     for(i=skip_dc; i<=last_index; i++){
4190         const int j = s->intra_scantable.permutated[i];
4191         block[j]=0;
4192     }
4193     if(block[0]) s->block_last_index[n]= 0;
4194     else         s->block_last_index[n]= -1;
4195 }
4196
4197 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4198 {
4199     int i;
4200     const int maxlevel= s->max_qcoeff;
4201     const int minlevel= s->min_qcoeff;
4202     int overflow=0;
4203
4204     if(s->mb_intra){
4205         i=1; //skip clipping of intra dc
4206     }else
4207         i=0;
4208
4209     for(;i<=last_index; i++){
4210         const int j= s->intra_scantable.permutated[i];
4211         int level = block[j];
4212
4213         if     (level>maxlevel){
4214             level=maxlevel;
4215             overflow++;
4216         }else if(level<minlevel){
4217             level=minlevel;
4218             overflow++;
4219         }
4220
4221         block[j]= level;
4222     }
4223
4224     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4225         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4226 }
4227
4228 #endif //CONFIG_ENCODERS
4229
4230 /**
4231  *
4232  * @param h is the normal height, this will be reduced automatically if needed for the last row
4233  */
4234 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4235     if (s->avctx->draw_horiz_band) {
4236         AVFrame *src;
4237         int offset[4];
4238
4239         if(s->picture_structure != PICT_FRAME){
4240             h <<= 1;
4241             y <<= 1;
4242             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4243         }
4244
4245         h= FFMIN(h, s->avctx->height - y);
4246
4247         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4248             src= (AVFrame*)s->current_picture_ptr;
4249         else if(s->last_picture_ptr)
4250             src= (AVFrame*)s->last_picture_ptr;
4251         else
4252             return;
4253
4254         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4255             offset[0]=
4256             offset[1]=
4257             offset[2]=
4258             offset[3]= 0;
4259         }else{
4260             offset[0]= y * s->linesize;;
4261             offset[1]=
4262             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4263             offset[3]= 0;
4264         }
4265
4266         emms_c();
4267
4268         s->avctx->draw_horiz_band(s->avctx, src, offset,
4269                                   y, s->picture_structure, h);
4270     }
4271 }
4272
4273 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4274     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4275     const int uvlinesize= s->current_picture.linesize[1];
4276     const int mb_size= 4 - s->avctx->lowres;
4277
4278     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4279     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4280     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4281     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4282     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4283     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4284     //block_index is not used by mpeg2, so it is not affected by chroma_format
4285
4286     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4287     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4288     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4289
4290     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4291     {
4292         s->dest[0] += s->mb_y *   linesize << mb_size;
4293         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4294         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4295     }
4296 }
4297
4298 #ifdef CONFIG_ENCODERS
4299
4300 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4301     int x, y;
4302 //FIXME optimize
4303     for(y=0; y<8; y++){
4304         for(x=0; x<8; x++){
4305             int x2, y2;
4306             int sum=0;
4307             int sqr=0;
4308             int count=0;
4309
4310             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4311                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4312                     int v= ptr[x2 + y2*stride];
4313                     sum += v;
4314                     sqr += v*v;
4315                     count++;
4316                 }
4317             }
4318             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4319         }
4320     }
4321 }
4322
4323 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4324 {
4325     int16_t weight[8][64];
4326     DCTELEM orig[8][64];
4327     const int mb_x= s->mb_x;
4328     const int mb_y= s->mb_y;
4329     int i;
4330     int skip_dct[8];
4331     int dct_offset   = s->linesize*8; //default for progressive frames
4332     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4333     int wrap_y, wrap_c;
4334
4335     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4336
4337     if(s->adaptive_quant){
4338         const int last_qp= s->qscale;
4339         const int mb_xy= mb_x + mb_y*s->mb_stride;
4340
4341         s->lambda= s->lambda_table[mb_xy];
4342         update_qscale(s);
4343
4344         if(!(s->flags&CODEC_FLAG_QP_RD)){
4345             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4346             s->dquant= s->qscale - last_qp;
4347
4348             if(s->out_format==FMT_H263){
4349                 s->dquant= av_clip(s->dquant, -2, 2);
4350
4351                 if(s->codec_id==CODEC_ID_MPEG4){
4352                     if(!s->mb_intra){
4353                         if(s->pict_type == B_TYPE){
4354                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4355                                 s->dquant= 0;
4356                         }
4357                         if(s->mv_type==MV_TYPE_8X8)
4358                             s->dquant=0;
4359                     }
4360                 }
4361             }
4362         }
4363         ff_set_qscale(s, last_qp + s->dquant);
4364     }else if(s->flags&CODEC_FLAG_QP_RD)
4365         ff_set_qscale(s, s->qscale + s->dquant);
4366
4367     wrap_y = s->linesize;
4368     wrap_c = s->uvlinesize;
4369     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4370     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4371     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4372
4373     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4374         uint8_t *ebuf= s->edge_emu_buffer + 32;
4375         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4376         ptr_y= ebuf;
4377         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4378         ptr_cb= ebuf+18*wrap_y;
4379         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4380         ptr_cr= ebuf+18*wrap_y+8;
4381     }
4382
4383     if (s->mb_intra) {
4384         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4385             int progressive_score, interlaced_score;
4386
4387             s->interlaced_dct=0;
4388             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4389                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4390
4391             if(progressive_score > 0){
4392                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4393                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4394                 if(progressive_score > interlaced_score){
4395                     s->interlaced_dct=1;
4396
4397                     dct_offset= wrap_y;
4398                     wrap_y<<=1;
4399                     if (s->chroma_format == CHROMA_422)
4400                         wrap_c<<=1;
4401                 }
4402             }
4403         }
4404
4405         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4406         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4407         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4408         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4409
4410         if(s->flags&CODEC_FLAG_GRAY){
4411             skip_dct[4]= 1;
4412             skip_dct[5]= 1;
4413         }else{
4414             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4415             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4416             if(!s->chroma_y_shift){ /* 422 */
4417                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4418                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4419             }
4420         }
4421     }else{
4422         op_pixels_func (*op_pix)[4];
4423         qpel_mc_func (*op_qpix)[16];
4424         uint8_t *dest_y, *dest_cb, *dest_cr;
4425
4426         dest_y  = s->dest[0];
4427         dest_cb = s->dest[1];
4428         dest_cr = s->dest[2];
4429
4430         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4431             op_pix = s->dsp.put_pixels_tab;
4432             op_qpix= s->dsp.put_qpel_pixels_tab;
4433         }else{
4434             op_pix = s->dsp.put_no_rnd_pixels_tab;
4435             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4436         }
4437
4438         if (s->mv_dir & MV_DIR_FORWARD) {
4439             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4440             op_pix = s->dsp.avg_pixels_tab;
4441             op_qpix= s->dsp.avg_qpel_pixels_tab;
4442         }
4443         if (s->mv_dir & MV_DIR_BACKWARD) {
4444             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4445         }
4446
4447         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4448             int progressive_score, interlaced_score;
4449
4450             s->interlaced_dct=0;
4451             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4452                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4453
4454             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4455
4456             if(progressive_score>0){
4457                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4458                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4459
4460                 if(progressive_score > interlaced_score){
4461                     s->interlaced_dct=1;
4462
4463                     dct_offset= wrap_y;
4464                     wrap_y<<=1;
4465                     if (s->chroma_format == CHROMA_422)
4466                         wrap_c<<=1;
4467                 }
4468             }
4469         }
4470
4471         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4472         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4473         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4474         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4475
4476         if(s->flags&CODEC_FLAG_GRAY){
4477             skip_dct[4]= 1;
4478             skip_dct[5]= 1;
4479         }else{
4480             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4481             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4482             if(!s->chroma_y_shift){ /* 422 */
4483                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4484                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4485             }
4486         }
4487         /* pre quantization */
4488         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4489             //FIXME optimize
4490             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4491             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4492             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4493             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4494             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4495             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4496             if(!s->chroma_y_shift){ /* 422 */
4497                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4498                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4499             }
4500         }
4501     }
4502
4503     if(s->avctx->quantizer_noise_shaping){
4504         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4505         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4506         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4507         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4508         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4509         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4510         if(!s->chroma_y_shift){ /* 422 */
4511             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4512             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4513         }
4514         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4515     }
4516
4517     /* DCT & quantize */
4518     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4519     {
4520         for(i=0;i<mb_block_count;i++) {
4521             if(!skip_dct[i]){
4522                 int overflow;
4523                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4524             // FIXME we could decide to change to quantizer instead of clipping
4525             // JS: I don't think that would be a good idea it could lower quality instead
4526             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4527                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4528             }else
4529                 s->block_last_index[i]= -1;
4530         }
4531         if(s->avctx->quantizer_noise_shaping){
4532             for(i=0;i<mb_block_count;i++) {
4533                 if(!skip_dct[i]){
4534                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4535                 }
4536             }
4537         }
4538
4539         if(s->luma_elim_threshold && !s->mb_intra)
4540             for(i=0; i<4; i++)
4541                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4542         if(s->chroma_elim_threshold && !s->mb_intra)
4543             for(i=4; i<mb_block_count; i++)
4544                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4545
4546         if(s->flags & CODEC_FLAG_CBP_RD){
4547             for(i=0;i<mb_block_count;i++) {
4548                 if(s->block_last_index[i] == -1)
4549                     s->coded_score[i]= INT_MAX/256;
4550             }
4551         }
4552     }
4553
4554     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4555         s->block_last_index[4]=
4556         s->block_last_index[5]= 0;
4557         s->block[4][0]=
4558         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4559     }
4560
4561     //non c quantize code returns incorrect block_last_index FIXME
4562     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4563         for(i=0; i<mb_block_count; i++){
4564             int j;
4565             if(s->block_last_index[i]>0){
4566                 for(j=63; j>0; j--){
4567                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4568                 }
4569                 s->block_last_index[i]= j;
4570             }
4571         }
4572     }
4573
4574     /* huffman encode */
4575     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4576     case CODEC_ID_MPEG1VIDEO:
4577     case CODEC_ID_MPEG2VIDEO:
4578         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4579     case CODEC_ID_MPEG4:
4580         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4581     case CODEC_ID_MSMPEG4V2:
4582     case CODEC_ID_MSMPEG4V3:
4583     case CODEC_ID_WMV1:
4584         if (ENABLE_MSMPEG4_ENCODER)
4585             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
4586         break;
4587     case CODEC_ID_WMV2:
4588         if (ENABLE_WMV2_ENCODER)
4589             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
4590         break;
4591 #ifdef CONFIG_H261_ENCODER
4592     case CODEC_ID_H261:
4593         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4594 #endif
4595     case CODEC_ID_H263:
4596     case CODEC_ID_H263P:
4597     case CODEC_ID_FLV1:
4598     case CODEC_ID_RV10:
4599     case CODEC_ID_RV20:
4600         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4601     case CODEC_ID_MJPEG:
4602         if (ENABLE_MJPEG_ENCODER)
4603         mjpeg_encode_mb(s, s->block); break;
4604     default:
4605         assert(0);
4606     }
4607 }
4608
4609 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4610 {
4611     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4612     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4613 }
4614
4615 #endif //CONFIG_ENCODERS
4616
4617 void ff_mpeg_flush(AVCodecContext *avctx){
4618     int i;
4619     MpegEncContext *s = avctx->priv_data;
4620
4621     if(s==NULL || s->picture==NULL)
4622         return;
4623
4624     for(i=0; i<MAX_PICTURE_COUNT; i++){
4625        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4626                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4627         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4628     }
4629     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4630
4631     s->mb_x= s->mb_y= 0;
4632
4633     s->parse_context.state= -1;
4634     s->parse_context.frame_start_found= 0;
4635     s->parse_context.overread= 0;
4636     s->parse_context.overread_index= 0;
4637     s->parse_context.index= 0;
4638     s->parse_context.last_index= 0;
4639     s->bitstream_buffer_size=0;
4640     s->pp_time=0;
4641 }
4642
4643 #ifdef CONFIG_ENCODERS
4644 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4645 {
4646     const uint16_t *srcw= (uint16_t*)src;
4647     int words= length>>4;
4648     int bits= length&15;
4649     int i;
4650
4651     if(length==0) return;
4652
4653     if(words < 16){
4654         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4655     }else if(put_bits_count(pb)&7){
4656         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4657     }else{
4658         for(i=0; put_bits_count(pb)&31; i++)
4659             put_bits(pb, 8, src[i]);
4660         flush_put_bits(pb);
4661         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4662         skip_put_bytes(pb, 2*words-i);
4663     }
4664
4665     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4666 }
4667
4668 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4669     int i;
4670
4671     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4672
4673     /* mpeg1 */
4674     d->mb_skip_run= s->mb_skip_run;
4675     for(i=0; i<3; i++)
4676         d->last_dc[i]= s->last_dc[i];
4677
4678     /* statistics */
4679     d->mv_bits= s->mv_bits;
4680     d->i_tex_bits= s->i_tex_bits;
4681     d->p_tex_bits= s->p_tex_bits;
4682     d->i_count= s->i_count;
4683     d->f_count= s->f_count;
4684     d->b_count= s->b_count;
4685     d->skip_count= s->skip_count;
4686     d->misc_bits= s->misc_bits;
4687     d->last_bits= 0;
4688
4689     d->mb_skipped= 0;
4690     d->qscale= s->qscale;
4691     d->dquant= s->dquant;
4692 }
4693
4694 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4695     int i;
4696
4697     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4698     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4699
4700     /* mpeg1 */
4701     d->mb_skip_run= s->mb_skip_run;
4702     for(i=0; i<3; i++)
4703         d->last_dc[i]= s->last_dc[i];
4704
4705     /* statistics */
4706     d->mv_bits= s->mv_bits;
4707     d->i_tex_bits= s->i_tex_bits;
4708     d->p_tex_bits= s->p_tex_bits;
4709     d->i_count= s->i_count;
4710     d->f_count= s->f_count;
4711     d->b_count= s->b_count;
4712     d->skip_count= s->skip_count;
4713     d->misc_bits= s->misc_bits;
4714
4715     d->mb_intra= s->mb_intra;
4716     d->mb_skipped= s->mb_skipped;
4717     d->mv_type= s->mv_type;
4718     d->mv_dir= s->mv_dir;
4719     d->pb= s->pb;
4720     if(s->data_partitioning){
4721         d->pb2= s->pb2;
4722         d->tex_pb= s->tex_pb;
4723     }
4724     d->block= s->block;
4725     for(i=0; i<8; i++)
4726         d->block_last_index[i]= s->block_last_index[i];
4727     d->interlaced_dct= s->interlaced_dct;
4728     d->qscale= s->qscale;
4729 }
4730
4731 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4732                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4733                            int *dmin, int *next_block, int motion_x, int motion_y)
4734 {
4735     int score;
4736     uint8_t *dest_backup[3];
4737
4738     copy_context_before_encode(s, backup, type);
4739
4740     s->block= s->blocks[*next_block];
4741     s->pb= pb[*next_block];
4742     if(s->data_partitioning){
4743         s->pb2   = pb2   [*next_block];
4744         s->tex_pb= tex_pb[*next_block];
4745     }
4746
4747     if(*next_block){
4748         memcpy(dest_backup, s->dest, sizeof(s->dest));
4749         s->dest[0] = s->rd_scratchpad;
4750         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4751         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4752         assert(s->linesize >= 32); //FIXME
4753     }
4754
4755     encode_mb(s, motion_x, motion_y);
4756
4757     score= put_bits_count(&s->pb);
4758     if(s->data_partitioning){
4759         score+= put_bits_count(&s->pb2);
4760         score+= put_bits_count(&s->tex_pb);
4761     }
4762
4763     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4764         MPV_decode_mb(s, s->block);
4765
4766         score *= s->lambda2;
4767         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4768     }
4769
4770     if(*next_block){
4771         memcpy(s->dest, dest_backup, sizeof(s->dest));
4772     }
4773
4774     if(score<*dmin){
4775         *dmin= score;
4776         *next_block^=1;
4777
4778         copy_context_after_encode(best, s, type);
4779     }
4780 }
4781
4782 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4783     uint32_t *sq = ff_squareTbl + 256;
4784     int acc=0;
4785     int x,y;
4786
4787     if(w==16 && h==16)
4788         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4789     else if(w==8 && h==8)
4790         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4791
4792     for(y=0; y<h; y++){
4793         for(x=0; x<w; x++){
4794             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4795         }
4796     }
4797
4798     assert(acc>=0);
4799
4800     return acc;
4801 }
4802
4803 static int sse_mb(MpegEncContext *s){
4804     int w= 16;
4805     int h= 16;
4806
4807     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4808     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4809
4810     if(w==16 && h==16)
4811       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4812         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4813                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4814                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4815       }else{
4816         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4817                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4818                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4819       }
4820     else
4821         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4822                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4823                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4824 }
4825
4826 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4827     MpegEncContext *s= arg;
4828
4829
4830     s->me.pre_pass=1;
4831     s->me.dia_size= s->avctx->pre_dia_size;
4832     s->first_slice_line=1;
4833     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4834         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4835             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4836         }
4837         s->first_slice_line=0;
4838     }
4839
4840     s->me.pre_pass=0;
4841
4842     return 0;
4843 }
4844
4845 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4846     MpegEncContext *s= arg;
4847
4848     ff_check_alignment();
4849
4850     s->me.dia_size= s->avctx->dia_size;
4851     s->first_slice_line=1;
4852     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4853         s->mb_x=0; //for block init below
4854         ff_init_block_index(s);
4855         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4856             s->block_index[0]+=2;
4857             s->block_index[1]+=2;
4858             s->block_index[2]+=2;
4859             s->block_index[3]+=2;
4860
4861             /* compute motion vector & mb_type and store in context */
4862             if(s->pict_type==B_TYPE)
4863                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4864             else
4865                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4866         }
4867         s->first_slice_line=0;
4868     }
4869     return 0;
4870 }
4871
4872 static int mb_var_thread(AVCodecContext *c, void *arg){
4873     MpegEncContext *s= arg;
4874     int mb_x, mb_y;
4875
4876     ff_check_alignment();
4877
4878     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4879         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4880             int xx = mb_x * 16;
4881             int yy = mb_y * 16;
4882             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4883             int varc;
4884             int sum = s->dsp.pix_sum(pix, s->linesize);
4885
4886             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4887
4888             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4889             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4890             s->me.mb_var_sum_temp    += varc;
4891         }
4892     }
4893     return 0;
4894 }
4895
4896 static void write_slice_end(MpegEncContext *s){
4897     if(s->codec_id==CODEC_ID_MPEG4){
4898         if(s->partitioned_frame){
4899             ff_mpeg4_merge_partitions(s);
4900         }
4901
4902         ff_mpeg4_stuffing(&s->pb);
4903     }else if(ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
4904         ff_mjpeg_stuffing(&s->pb);
4905     }
4906
4907     align_put_bits(&s->pb);
4908     flush_put_bits(&s->pb);
4909
4910     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4911         s->misc_bits+= get_bits_diff(s);
4912 }
4913
4914 static int encode_thread(AVCodecContext *c, void *arg){
4915     MpegEncContext *s= arg;
4916     int mb_x, mb_y, pdif = 0;
4917     int i, j;
4918     MpegEncContext best_s, backup_s;
4919     uint8_t bit_buf[2][MAX_MB_BYTES];
4920     uint8_t bit_buf2[2][MAX_MB_BYTES];
4921     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4922     PutBitContext pb[2], pb2[2], tex_pb[2];
4923 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4924
4925     ff_check_alignment();
4926
4927     for(i=0; i<2; i++){
4928         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4929         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4930         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4931     }
4932
4933     s->last_bits= put_bits_count(&s->pb);
4934     s->mv_bits=0;
4935     s->misc_bits=0;
4936     s->i_tex_bits=0;
4937     s->p_tex_bits=0;
4938     s->i_count=0;
4939     s->f_count=0;
4940     s->b_count=0;
4941     s->skip_count=0;
4942
4943     for(i=0; i<3; i++){
4944         /* init last dc values */
4945         /* note: quant matrix value (8) is implied here */
4946         s->last_dc[i] = 128 << s->intra_dc_precision;
4947
4948         s->current_picture.error[i] = 0;
4949     }
4950     s->mb_skip_run = 0;
4951     memset(s->last_mv, 0, sizeof(s->last_mv));
4952
4953     s->last_mv_dir = 0;
4954
4955     switch(s->codec_id){
4956     case CODEC_ID_H263:
4957     case CODEC_ID_H263P:
4958     case CODEC_ID_FLV1:
4959         s->gob_index = ff_h263_get_gob_height(s);
4960         break;
4961     case CODEC_ID_MPEG4:
4962         if(s->partitioned_frame)
4963             ff_mpeg4_init_partitions(s);
4964         break;
4965     }
4966
4967     s->resync_mb_x=0;
4968     s->resync_mb_y=0;
4969     s->first_slice_line = 1;
4970     s->ptr_lastgob = s->pb.buf;
4971     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4972 //    printf("row %d at %X\n", s->mb_y, (int)s);
4973         s->mb_x=0;
4974         s->mb_y= mb_y;
4975
4976         ff_set_qscale(s, s->qscale);
4977         ff_init_block_index(s);
4978
4979         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4980             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4981             int mb_type= s->mb_type[xy];
4982 //            int d;
4983             int dmin= INT_MAX;
4984             int dir;
4985
4986             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4987                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4988                 return -1;
4989             }
4990             if(s->data_partitioning){
4991                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4992                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4993                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4994                     return -1;
4995                 }
4996             }
4997
4998             s->mb_x = mb_x;
4999             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
5000             ff_update_block_index(s);
5001
5002 #ifdef CONFIG_H261_ENCODER
5003             if(s->codec_id == CODEC_ID_H261){
5004                 ff_h261_reorder_mb_index(s);
5005                 xy= s->mb_y*s->mb_stride + s->mb_x;
5006                 mb_type= s->mb_type[xy];
5007             }
5008 #endif
5009
5010             /* write gob / video packet header  */
5011             if(s->rtp_mode){
5012                 int current_packet_size, is_gob_start;
5013
5014                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5015
5016                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5017
5018                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5019
5020                 switch(s->codec_id){
5021                 case CODEC_ID_H263:
5022                 case CODEC_ID_H263P:
5023                     if(!s->h263_slice_structured)
5024                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5025                     break;
5026                 case CODEC_ID_MPEG2VIDEO:
5027                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5028                 case CODEC_ID_MPEG1VIDEO:
5029                     if(s->mb_skip_run) is_gob_start=0;
5030                     break;
5031                 }
5032
5033                 if(is_gob_start){
5034                     if(s->start_mb_y != mb_y || mb_x!=0){
5035                         write_slice_end(s);
5036
5037                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5038                             ff_mpeg4_init_partitions(s);
5039                         }
5040                     }
5041
5042                     assert((put_bits_count(&s->pb)&7) == 0);
5043                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5044
5045                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5046                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5047                         int d= 100 / s->avctx->error_rate;
5048                         if(r % d == 0){
5049                             current_packet_size=0;
5050 #ifndef ALT_BITSTREAM_WRITER
5051                             s->pb.buf_ptr= s->ptr_lastgob;
5052 #endif
5053                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5054                         }
5055                     }
5056
5057                     if (s->avctx->rtp_callback){
5058                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5059                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5060                     }
5061
5062                     switch(s->codec_id){
5063                     case CODEC_ID_MPEG4:
5064                         ff_mpeg4_encode_video_packet_header(s);
5065                         ff_mpeg4_clean_buffers(s);
5066                     break;
5067                     case CODEC_ID_MPEG1VIDEO:
5068                     case CODEC_ID_MPEG2VIDEO:
5069                         ff_mpeg1_encode_slice_header(s);
5070                         ff_mpeg1_clean_buffers(s);
5071                     break;
5072                     case CODEC_ID_H263:
5073                     case CODEC_ID_H263P:
5074                         h263_encode_gob_header(s, mb_y);
5075                     break;
5076                     }
5077
5078                     if(s->flags&CODEC_FLAG_PASS1){
5079                         int bits= put_bits_count(&s->pb);
5080                         s->misc_bits+= bits - s->last_bits;
5081                         s->last_bits= bits;
5082                     }
5083
5084                     s->ptr_lastgob += current_packet_size;
5085                     s->first_slice_line=1;
5086                     s->resync_mb_x=mb_x;
5087                     s->resync_mb_y=mb_y;
5088                 }
5089             }
5090
5091             if(  (s->resync_mb_x   == s->mb_x)
5092                && s->resync_mb_y+1 == s->mb_y){
5093                 s->first_slice_line=0;
5094             }
5095
5096             s->mb_skipped=0;
5097             s->dquant=0; //only for QP_RD
5098
5099             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5100                 int next_block=0;
5101                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5102
5103                 copy_context_before_encode(&backup_s, s, -1);
5104                 backup_s.pb= s->pb;
5105                 best_s.data_partitioning= s->data_partitioning;
5106                 best_s.partitioned_frame= s->partitioned_frame;
5107                 if(s->data_partitioning){
5108                     backup_s.pb2= s->pb2;
5109                     backup_s.tex_pb= s->tex_pb;
5110                 }
5111
5112                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5113                     s->mv_dir = MV_DIR_FORWARD;
5114                     s->mv_type = MV_TYPE_16X16;
5115                     s->mb_intra= 0;
5116                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5117                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5118                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5119                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5120                 }
5121                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5122                     s->mv_dir = MV_DIR_FORWARD;
5123                     s->mv_type = MV_TYPE_FIELD;
5124                     s->mb_intra= 0;
5125                     for(i=0; i<2; i++){
5126                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5127                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5128                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5129                     }
5130                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5131                                  &dmin, &next_block, 0, 0);
5132                 }
5133                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5134                     s->mv_dir = MV_DIR_FORWARD;
5135                     s->mv_type = MV_TYPE_16X16;
5136                     s->mb_intra= 0;
5137                     s->mv[0][0][0] = 0;
5138                     s->mv[0][0][1] = 0;
5139                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5140                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5141                 }
5142                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5143                     s->mv_dir = MV_DIR_FORWARD;
5144                     s->mv_type = MV_TYPE_8X8;
5145                     s->mb_intra= 0;
5146                     for(i=0; i<4; i++){
5147                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5148                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5149                     }
5150                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5151                                  &dmin, &next_block, 0, 0);
5152                 }
5153                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5154                     s->mv_dir = MV_DIR_FORWARD;
5155                     s->mv_type = MV_TYPE_16X16;
5156                     s->mb_intra= 0;
5157                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5158                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5159                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5160                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5161                 }
5162                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5163                     s->mv_dir = MV_DIR_BACKWARD;
5164                     s->mv_type = MV_TYPE_16X16;
5165                     s->mb_intra= 0;
5166                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5167                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5169                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5170                 }
5171                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5172                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5173                     s->mv_type = MV_TYPE_16X16;
5174                     s->mb_intra= 0;
5175                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5176                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5177                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5178                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5179                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5180                                  &dmin, &next_block, 0, 0);
5181                 }
5182                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5183                     s->mv_dir = MV_DIR_FORWARD;
5184                     s->mv_type = MV_TYPE_FIELD;
5185                     s->mb_intra= 0;
5186                     for(i=0; i<2; i++){
5187                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5188                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5189                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5190                     }
5191                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5192                                  &dmin, &next_block, 0, 0);
5193                 }
5194                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5195                     s->mv_dir = MV_DIR_BACKWARD;
5196                     s->mv_type = MV_TYPE_FIELD;
5197                     s->mb_intra= 0;
5198                     for(i=0; i<2; i++){
5199                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5200                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5201                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5202                     }
5203                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5204                                  &dmin, &next_block, 0, 0);
5205                 }
5206                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5207                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5208                     s->mv_type = MV_TYPE_FIELD;
5209                     s->mb_intra= 0;
5210                     for(dir=0; dir<2; dir++){
5211                         for(i=0; i<2; i++){
5212                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5213                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5214                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5215                         }
5216                     }
5217                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5218                                  &dmin, &next_block, 0, 0);
5219                 }
5220                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5221                     s->mv_dir = 0;
5222                     s->mv_type = MV_TYPE_16X16;
5223                     s->mb_intra= 1;
5224                     s->mv[0][0][0] = 0;
5225                     s->mv[0][0][1] = 0;
5226                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5227                                  &dmin, &next_block, 0, 0);
5228                     if(s->h263_pred || s->h263_aic){
5229                         if(best_s.mb_intra)
5230                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5231                         else
5232                             ff_clean_intra_table_entries(s); //old mode?
5233                     }
5234                 }
5235
5236                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5237                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5238                         const int last_qp= backup_s.qscale;
5239                         int qpi, qp, dc[6];
5240                         DCTELEM ac[6][16];
5241                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5242                         static const int dquant_tab[4]={-1,1,-2,2};
5243
5244                         assert(backup_s.dquant == 0);
5245
5246                         //FIXME intra
5247                         s->mv_dir= best_s.mv_dir;
5248                         s->mv_type = MV_TYPE_16X16;
5249                         s->mb_intra= best_s.mb_intra;
5250                         s->mv[0][0][0] = best_s.mv[0][0][0];
5251                         s->mv[0][0][1] = best_s.mv[0][0][1];
5252                         s->mv[1][0][0] = best_s.mv[1][0][0];
5253                         s->mv[1][0][1] = best_s.mv[1][0][1];
5254
5255                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5256                         for(; qpi<4; qpi++){
5257                             int dquant= dquant_tab[qpi];
5258                             qp= last_qp + dquant;
5259                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5260                                 continue;
5261                             backup_s.dquant= dquant;
5262                             if(s->mb_intra && s->dc_val[0]){
5263                                 for(i=0; i<6; i++){
5264                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5265                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5266                                 }
5267                             }
5268
5269                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5270                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5271                             if(best_s.qscale != qp){
5272                                 if(s->mb_intra && s->dc_val[0]){
5273                                     for(i=0; i<6; i++){
5274                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5275                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5276                                     }
5277                                 }
5278                             }
5279                         }
5280                     }
5281                 }
5282                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5283                     int mx= s->b_direct_mv_table[xy][0];
5284                     int my= s->b_direct_mv_table[xy][1];
5285
5286                     backup_s.dquant = 0;
5287                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5288                     s->mb_intra= 0;
5289                     ff_mpeg4_set_direct_mv(s, mx, my);
5290                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5291                                  &dmin, &next_block, mx, my);
5292                 }
5293                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5294                     backup_s.dquant = 0;
5295                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5296                     s->mb_intra= 0;
5297                     ff_mpeg4_set_direct_mv(s, 0, 0);
5298                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5299                                  &dmin, &next_block, 0, 0);
5300                 }
5301                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5302                     int coded=0;
5303                     for(i=0; i<6; i++)
5304                         coded |= s->block_last_index[i];
5305                     if(coded){
5306                         int mx,my;
5307                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5308                         if(best_s.mv_dir & MV_DIRECT){
5309                             mx=my=0; //FIXME find the one we actually used
5310                             ff_mpeg4_set_direct_mv(s, mx, my);
5311                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5312                             mx= s->mv[1][0][0];
5313                             my= s->mv[1][0][1];
5314                         }else{
5315                             mx= s->mv[0][0][0];
5316                             my= s->mv[0][0][1];
5317                         }
5318
5319                         s->mv_dir= best_s.mv_dir;
5320                         s->mv_type = best_s.mv_type;
5321                         s->mb_intra= 0;
5322 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5323                         s->mv[0][0][1] = best_s.mv[0][0][1];
5324                         s->mv[1][0][0] = best_s.mv[1][0][0];
5325                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5326                         backup_s.dquant= 0;
5327                         s->skipdct=1;
5328                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5329                                         &dmin, &next_block, mx, my);
5330                         s->skipdct=0;
5331                     }
5332                 }
5333
5334                 s->current_picture.qscale_table[xy]= best_s.qscale;
5335
5336                 copy_context_after_encode(s, &best_s, -1);
5337
5338                 pb_bits_count= put_bits_count(&s->pb);
5339                 flush_put_bits(&s->pb);
5340                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5341                 s->pb= backup_s.pb;
5342
5343                 if(s->data_partitioning){
5344                     pb2_bits_count= put_bits_count(&s->pb2);
5345                     flush_put_bits(&s->pb2);
5346                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5347                     s->pb2= backup_s.pb2;
5348
5349                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5350                     flush_put_bits(&s->tex_pb);
5351                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5352                     s->tex_pb= backup_s.tex_pb;
5353                 }
5354                 s->last_bits= put_bits_count(&s->pb);
5355
5356                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5357                     ff_h263_update_motion_val(s);
5358
5359                 if(next_block==0){ //FIXME 16 vs linesize16
5360                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5361                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5362                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5363                 }
5364
5365                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5366                     MPV_decode_mb(s, s->block);
5367             } else {
5368                 int motion_x = 0, motion_y = 0;
5369                 s->mv_type=MV_TYPE_16X16;
5370                 // only one MB-Type possible
5371
5372                 switch(mb_type){
5373                 case CANDIDATE_MB_TYPE_INTRA:
5374                     s->mv_dir = 0;
5375                     s->mb_intra= 1;
5376                     motion_x= s->mv[0][0][0] = 0;
5377                     motion_y= s->mv[0][0][1] = 0;
5378                     break;
5379                 case CANDIDATE_MB_TYPE_INTER:
5380                     s->mv_dir = MV_DIR_FORWARD;
5381                     s->mb_intra= 0;
5382                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5383                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5384                     break;
5385                 case CANDIDATE_MB_TYPE_INTER_I:
5386                     s->mv_dir = MV_DIR_FORWARD;
5387                     s->mv_type = MV_TYPE_FIELD;
5388                     s->mb_intra= 0;
5389                     for(i=0; i<2; i++){
5390                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5391                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5392                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5393                     }
5394                     break;
5395                 case CANDIDATE_MB_TYPE_INTER4V:
5396                     s->mv_dir = MV_DIR_FORWARD;
5397                     s->mv_type = MV_TYPE_8X8;
5398                     s->mb_intra= 0;
5399                     for(i=0; i<4; i++){
5400                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5401                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5402                     }
5403                     break;
5404                 case CANDIDATE_MB_TYPE_DIRECT:
5405                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5406                     s->mb_intra= 0;
5407                     motion_x=s->b_direct_mv_table[xy][0];
5408                     motion_y=s->b_direct_mv_table[xy][1];
5409                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5410                     break;
5411                 case CANDIDATE_MB_TYPE_DIRECT0:
5412                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5413                     s->mb_intra= 0;
5414                     ff_mpeg4_set_direct_mv(s, 0, 0);
5415                     break;
5416                 case CANDIDATE_MB_TYPE_BIDIR:
5417                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5418                     s->mb_intra= 0;
5419                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5420                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5421                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5422                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5423                     break;
5424                 case CANDIDATE_MB_TYPE_BACKWARD:
5425                     s->mv_dir = MV_DIR_BACKWARD;
5426                     s->mb_intra= 0;
5427                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5428                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5429                     break;
5430                 case CANDIDATE_MB_TYPE_FORWARD:
5431                     s->mv_dir = MV_DIR_FORWARD;
5432                     s->mb_intra= 0;
5433                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5434                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5435 //                    printf(" %d %d ", motion_x, motion_y);
5436                     break;
5437                 case CANDIDATE_MB_TYPE_FORWARD_I:
5438                     s->mv_dir = MV_DIR_FORWARD;
5439                     s->mv_type = MV_TYPE_FIELD;
5440                     s->mb_intra= 0;
5441                     for(i=0; i<2; i++){
5442                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5443                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5444                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5445                     }
5446                     break;
5447                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5448                     s->mv_dir = MV_DIR_BACKWARD;
5449                     s->mv_type = MV_TYPE_FIELD;
5450                     s->mb_intra= 0;
5451                     for(i=0; i<2; i++){
5452                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5453                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5454                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5455                     }
5456                     break;
5457                 case CANDIDATE_MB_TYPE_BIDIR_I:
5458                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5459                     s->mv_type = MV_TYPE_FIELD;
5460                     s->mb_intra= 0;
5461                     for(dir=0; dir<2; dir++){
5462                         for(i=0; i<2; i++){
5463                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5464                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5465                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5466                         }
5467                     }
5468                     break;
5469                 default:
5470                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5471                 }
5472
5473                 encode_mb(s, motion_x, motion_y);
5474
5475                 // RAL: Update last macroblock type
5476                 s->last_mv_dir = s->mv_dir;
5477
5478                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5479                     ff_h263_update_motion_val(s);
5480
5481                 MPV_decode_mb(s, s->block);
5482             }
5483
5484             /* clean the MV table in IPS frames for direct mode in B frames */
5485             if(s->mb_intra /* && I,P,S_TYPE */){
5486                 s->p_mv_table[xy][0]=0;
5487                 s->p_mv_table[xy][1]=0;
5488             }
5489
5490             if(s->flags&CODEC_FLAG_PSNR){
5491                 int w= 16;
5492                 int h= 16;
5493
5494                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5495                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5496
5497                 s->current_picture.error[0] += sse(
5498                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5499                     s->dest[0], w, h, s->linesize);
5500                 s->current_picture.error[1] += sse(
5501                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5502                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5503                 s->current_picture.error[2] += sse(
5504                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5505                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5506             }
5507             if(s->loop_filter){
5508                 if(s->out_format == FMT_H263)
5509                     ff_h263_loop_filter(s);
5510             }
5511 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5512         }
5513     }
5514
5515     //not beautiful here but we must write it before flushing so it has to be here
5516     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5517         msmpeg4_encode_ext_header(s);
5518
5519     write_slice_end(s);
5520
5521     /* Send the last GOB if RTP */
5522     if (s->avctx->rtp_callback) {
5523         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5524         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5525         /* Call the RTP callback to send the last GOB */
5526         emms_c();
5527         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5528     }
5529
5530     return 0;
5531 }
5532
5533 #define MERGE(field) dst->field += src->field; src->field=0
5534 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5535     MERGE(me.scene_change_score);
5536     MERGE(me.mc_mb_var_sum_temp);
5537     MERGE(me.mb_var_sum_temp);
5538 }
5539
5540 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5541     int i;
5542
5543     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5544     MERGE(dct_count[1]);
5545     MERGE(mv_bits);
5546     MERGE(i_tex_bits);
5547     MERGE(p_tex_bits);
5548     MERGE(i_count);
5549     MERGE(f_count);
5550     MERGE(b_count);
5551     MERGE(skip_count);
5552     MERGE(misc_bits);
5553     MERGE(error_count);
5554     MERGE(padding_bug_score);
5555     MERGE(current_picture.error[0]);
5556     MERGE(current_picture.error[1]);
5557     MERGE(current_picture.error[2]);
5558
5559     if(dst->avctx->noise_reduction){
5560         for(i=0; i<64; i++){
5561             MERGE(dct_error_sum[0][i]);
5562             MERGE(dct_error_sum[1][i]);
5563         }
5564     }
5565
5566     assert(put_bits_count(&src->pb) % 8 ==0);
5567     assert(put_bits_count(&dst->pb) % 8 ==0);
5568     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5569     flush_put_bits(&dst->pb);
5570 }
5571
5572 static int estimate_qp(MpegEncContext *s, int dry_run){
5573     if (s->next_lambda){
5574         s->current_picture_ptr->quality=
5575         s->current_picture.quality = s->next_lambda;
5576         if(!dry_run) s->next_lambda= 0;
5577     } else if (!s->fixed_qscale) {
5578         s->current_picture_ptr->quality=
5579         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5580         if (s->current_picture.quality < 0)
5581             return -1;
5582     }
5583
5584     if(s->adaptive_quant){
5585         switch(s->codec_id){
5586         case CODEC_ID_MPEG4:
5587             ff_clean_mpeg4_qscales(s);
5588             break;
5589         case CODEC_ID_H263:
5590         case CODEC_ID_H263P:
5591         case CODEC_ID_FLV1:
5592             ff_clean_h263_qscales(s);
5593             break;
5594         }
5595
5596         s->lambda= s->lambda_table[0];
5597         //FIXME broken
5598     }else
5599         s->lambda= s->current_picture.quality;
5600 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5601     update_qscale(s);
5602     return 0;
5603 }
5604
5605 static int encode_picture(MpegEncContext *s, int picture_number)
5606 {
5607     int i;
5608     int bits;
5609
5610     s->picture_number = picture_number;
5611
5612     /* Reset the average MB variance */
5613     s->me.mb_var_sum_temp    =
5614     s->me.mc_mb_var_sum_temp = 0;
5615
5616     /* we need to initialize some time vars before we can encode b-frames */
5617     // RAL: Condition added for MPEG1VIDEO
5618     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5619         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5620
5621     s->me.scene_change_score=0;
5622
5623 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5624
5625     if(s->pict_type==I_TYPE){
5626         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5627         else                        s->no_rounding=0;
5628     }else if(s->pict_type!=B_TYPE){
5629         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5630             s->no_rounding ^= 1;
5631     }
5632
5633     if(s->flags & CODEC_FLAG_PASS2){
5634         if (estimate_qp(s,1) < 0)
5635             return -1;
5636         ff_get_2pass_fcode(s);
5637     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5638         if(s->pict_type==B_TYPE)
5639             s->lambda= s->last_lambda_for[s->pict_type];
5640         else
5641             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5642         update_qscale(s);
5643     }
5644
5645     s->mb_intra=0; //for the rate distortion & bit compare functions
5646     for(i=1; i<s->avctx->thread_count; i++){
5647         ff_update_duplicate_context(s->thread_context[i], s);
5648     }
5649
5650     ff_init_me(s);
5651
5652     /* Estimate motion for every MB */
5653     if(s->pict_type != I_TYPE){
5654         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5655         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5656         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5657             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5658                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5659             }
5660         }
5661
5662         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5663     }else /* if(s->pict_type == I_TYPE) */{
5664         /* I-Frame */
5665         for(i=0; i<s->mb_stride*s->mb_height; i++)
5666             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5667
5668         if(!s->fixed_qscale){
5669             /* finding spatial complexity for I-frame rate control */
5670             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5671         }
5672     }
5673     for(i=1; i<s->avctx->thread_count; i++){
5674         merge_context_after_me(s, s->thread_context[i]);
5675     }
5676     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5677     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5678     emms_c();
5679
5680     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5681         s->pict_type= I_TYPE;
5682         for(i=0; i<s->mb_stride*s->mb_height; i++)
5683             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5684 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5685     }
5686
5687     if(!s->umvplus){
5688         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5689             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5690
5691             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5692                 int a,b;
5693                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5694                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5695                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5696             }
5697
5698             ff_fix_long_p_mvs(s);
5699             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5700             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5701                 int j;
5702                 for(i=0; i<2; i++){
5703                     for(j=0; j<2; j++)
5704                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5705                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5706                 }
5707             }
5708         }
5709
5710         if(s->pict_type==B_TYPE){
5711             int a, b;
5712
5713             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5714             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5715             s->f_code = FFMAX(a, b);
5716
5717             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5718             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5719             s->b_code = FFMAX(a, b);
5720
5721             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5722             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5723             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5724             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5725             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5726                 int dir, j;
5727                 for(dir=0; dir<2; dir++){
5728                     for(i=0; i<2; i++){
5729                         for(j=0; j<2; j++){
5730                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5731                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5732                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5733                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5734                         }
5735                     }
5736                 }
5737             }
5738         }
5739     }
5740
5741     if (estimate_qp(s, 0) < 0)
5742         return -1;
5743
5744     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5745         s->qscale= 3; //reduce clipping problems
5746
5747     if (s->out_format == FMT_MJPEG) {
5748         /* for mjpeg, we do include qscale in the matrix */
5749         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5750         for(i=1;i<64;i++){
5751             int j= s->dsp.idct_permutation[i];
5752
5753             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5754         }
5755         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5756                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5757         s->qscale= 8;
5758     }
5759
5760     //FIXME var duplication
5761     s->current_picture_ptr->key_frame=
5762     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5763     s->current_picture_ptr->pict_type=
5764     s->current_picture.pict_type= s->pict_type;
5765
5766     if(s->current_picture.key_frame)
5767         s->picture_in_gop_number=0;
5768
5769     s->last_bits= put_bits_count(&s->pb);
5770     switch(s->out_format) {
5771     case FMT_MJPEG:
5772         if (ENABLE_MJPEG_ENCODER)
5773         mjpeg_picture_header(s);
5774         break;
5775 #ifdef CONFIG_H261_ENCODER
5776     case FMT_H261:
5777         ff_h261_encode_picture_header(s, picture_number);
5778         break;
5779 #endif
5780     case FMT_H263:
5781         if (ENABLE_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
5782             ff_wmv2_encode_picture_header(s, picture_number);
5783         else if (ENABLE_MSMPEG4_ENCODER && s->h263_msmpeg4)
5784             msmpeg4_encode_picture_header(s, picture_number);
5785         else if (s->h263_pred)
5786             mpeg4_encode_picture_header(s, picture_number);
5787 #ifdef CONFIG_RV10_ENCODER
5788         else if (s->codec_id == CODEC_ID_RV10)
5789             rv10_encode_picture_header(s, picture_number);
5790 #endif
5791 #ifdef CONFIG_RV20_ENCODER
5792         else if (s->codec_id == CODEC_ID_RV20)
5793             rv20_encode_picture_header(s, picture_number);
5794 #endif
5795         else if (s->codec_id == CODEC_ID_FLV1)
5796             ff_flv_encode_picture_header(s, picture_number);
5797         else
5798             h263_encode_picture_header(s, picture_number);
5799         break;
5800     case FMT_MPEG1:
5801         mpeg1_encode_picture_header(s, picture_number);
5802         break;
5803     case FMT_H264:
5804         break;
5805     default:
5806         assert(0);
5807     }
5808     bits= put_bits_count(&s->pb);
5809     s->header_bits= bits - s->last_bits;
5810
5811     for(i=1; i<s->avctx->thread_count; i++){
5812         update_duplicate_context_after_me(s->thread_context[i], s);
5813     }
5814     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5815     for(i=1; i<s->avctx->thread_count; i++){
5816         merge_context_after_encode(s, s->thread_context[i]);
5817     }
5818     emms_c();
5819     return 0;
5820 }
5821
5822 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5823     const int intra= s->mb_intra;
5824     int i;
5825
5826     s->dct_count[intra]++;
5827
5828     for(i=0; i<64; i++){
5829         int level= block[i];
5830
5831         if(level){
5832             if(level>0){
5833                 s->dct_error_sum[intra][i] += level;
5834                 level -= s->dct_offset[intra][i];
5835                 if(level<0) level=0;
5836             }else{
5837                 s->dct_error_sum[intra][i] -= level;
5838                 level += s->dct_offset[intra][i];
5839                 if(level>0) level=0;
5840             }
5841             block[i]= level;
5842         }
5843     }
5844 }
5845
5846 static int dct_quantize_trellis_c(MpegEncContext *s,
5847                         DCTELEM *block, int n,
5848                         int qscale, int *overflow){
5849     const int *qmat;
5850     const uint8_t *scantable= s->intra_scantable.scantable;
5851     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5852     int max=0;
5853     unsigned int threshold1, threshold2;
5854     int bias=0;
5855     int run_tab[65];
5856     int level_tab[65];
5857     int score_tab[65];
5858     int survivor[65];
5859     int survivor_count;
5860     int last_run=0;
5861     int last_level=0;
5862     int last_score= 0;
5863     int last_i;
5864     int coeff[2][64];
5865     int coeff_count[64];
5866     int qmul, qadd, start_i, last_non_zero, i, dc;
5867     const int esc_length= s->ac_esc_length;
5868     uint8_t * length;
5869     uint8_t * last_length;
5870     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5871
5872     s->dsp.fdct (block);
5873
5874     if(s->dct_error_sum)
5875         s->denoise_dct(s, block);
5876     qmul= qscale*16;
5877     qadd= ((qscale-1)|1)*8;
5878
5879     if (s->mb_intra) {
5880         int q;
5881         if (!s->h263_aic) {
5882             if (n < 4)
5883                 q = s->y_dc_scale;
5884             else
5885                 q = s->c_dc_scale;
5886             q = q << 3;
5887         } else{
5888             /* For AIC we skip quant/dequant of INTRADC */
5889             q = 1 << 3;
5890             qadd=0;
5891         }
5892
5893         /* note: block[0] is assumed to be positive */
5894         block[0] = (block[0] + (q >> 1)) / q;
5895         start_i = 1;
5896         last_non_zero = 0;
5897         qmat = s->q_intra_matrix[qscale];
5898         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5899             bias= 1<<(QMAT_SHIFT-1);
5900         length     = s->intra_ac_vlc_length;
5901         last_length= s->intra_ac_vlc_last_length;
5902     } else {
5903         start_i = 0;
5904         last_non_zero = -1;
5905         qmat = s->q_inter_matrix[qscale];
5906         length     = s->inter_ac_vlc_length;
5907         last_length= s->inter_ac_vlc_last_length;
5908     }
5909     last_i= start_i;
5910
5911     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5912     threshold2= (threshold1<<1);
5913
5914     for(i=63; i>=start_i; i--) {
5915         const int j = scantable[i];
5916         int level = block[j] * qmat[j];
5917
5918         if(((unsigned)(level+threshold1))>threshold2){
5919             last_non_zero = i;
5920             break;
5921         }
5922     }
5923
5924     for(i=start_i; i<=last_non_zero; i++) {
5925         const int j = scantable[i];
5926         int level = block[j] * qmat[j];
5927
5928 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5929 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5930         if(((unsigned)(level+threshold1))>threshold2){
5931             if(level>0){
5932                 level= (bias + level)>>QMAT_SHIFT;
5933                 coeff[0][i]= level;
5934                 coeff[1][i]= level-1;
5935 //                coeff[2][k]= level-2;
5936             }else{
5937                 level= (bias - level)>>QMAT_SHIFT;
5938                 coeff[0][i]= -level;
5939                 coeff[1][i]= -level+1;
5940 //                coeff[2][k]= -level+2;
5941             }
5942             coeff_count[i]= FFMIN(level, 2);
5943             assert(coeff_count[i]);
5944             max |=level;
5945         }else{
5946             coeff[0][i]= (level>>31)|1;
5947             coeff_count[i]= 1;
5948         }
5949     }
5950
5951     *overflow= s->max_qcoeff < max; //overflow might have happened
5952
5953     if(last_non_zero < start_i){
5954         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5955         return last_non_zero;
5956     }
5957
5958     score_tab[start_i]= 0;
5959     survivor[0]= start_i;
5960     survivor_count= 1;
5961
5962     for(i=start_i; i<=last_non_zero; i++){
5963         int level_index, j;
5964         const int dct_coeff= FFABS(block[ scantable[i] ]);
5965         const int zero_distoration= dct_coeff*dct_coeff;
5966         int best_score=256*256*256*120;
5967         for(level_index=0; level_index < coeff_count[i]; level_index++){
5968             int distoration;
5969             int level= coeff[level_index][i];
5970             const int alevel= FFABS(level);
5971             int unquant_coeff;
5972
5973             assert(level);
5974
5975             if(s->out_format == FMT_H263){
5976                 unquant_coeff= alevel*qmul + qadd;
5977             }else{ //MPEG1
5978                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5979                 if(s->mb_intra){
5980                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5981                         unquant_coeff =   (unquant_coeff - 1) | 1;
5982                 }else{
5983                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5984                         unquant_coeff =   (unquant_coeff - 1) | 1;
5985                 }
5986                 unquant_coeff<<= 3;
5987             }
5988
5989             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5990             level+=64;
5991             if((level&(~127)) == 0){
5992                 for(j=survivor_count-1; j>=0; j--){
5993                     int run= i - survivor[j];
5994                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5995                     score += score_tab[i-run];
5996
5997                     if(score < best_score){
5998                         best_score= score;
5999                         run_tab[i+1]= run;
6000                         level_tab[i+1]= level-64;
6001                     }
6002                 }
6003
6004                 if(s->out_format == FMT_H263){
6005                     for(j=survivor_count-1; j>=0; j--){
6006                         int run= i - survivor[j];
6007                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
6008                         score += score_tab[i-run];
6009                         if(score < last_score){
6010                             last_score= score;
6011                             last_run= run;
6012                             last_level= level-64;
6013                             last_i= i+1;
6014                         }
6015                     }
6016                 }
6017             }else{
6018                 distoration += esc_length*lambda;
6019                 for(j=survivor_count-1; j>=0; j--){
6020                     int run= i - survivor[j];
6021                     int score= distoration + score_tab[i-run];
6022
6023                     if(score < best_score){
6024                         best_score= score;
6025                         run_tab[i+1]= run;
6026                         level_tab[i+1]= level-64;
6027                     }
6028                 }
6029
6030                 if(s->out_format == FMT_H263){
6031                   for(j=survivor_count-1; j>=0; j--){
6032                         int run= i - survivor[j];
6033                         int score= distoration + score_tab[i-run];
6034                         if(score < last_score){
6035                             last_score= score;
6036                             last_run= run;
6037                             last_level= level-64;
6038                             last_i= i+1;
6039                         }
6040                     }
6041                 }
6042             }
6043         }
6044
6045         score_tab[i+1]= best_score;
6046
6047         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6048         if(last_non_zero <= 27){
6049             for(; survivor_count; survivor_count--){
6050                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6051                     break;
6052             }
6053         }else{
6054             for(; survivor_count; survivor_count--){
6055                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6056                     break;
6057             }
6058         }
6059
6060         survivor[ survivor_count++ ]= i+1;
6061     }
6062
6063     if(s->out_format != FMT_H263){
6064         last_score= 256*256*256*120;
6065         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6066             int score= score_tab[i];
6067             if(i) score += lambda*2; //FIXME exacter?
6068
6069             if(score < last_score){
6070                 last_score= score;
6071                 last_i= i;
6072                 last_level= level_tab[i];
6073                 last_run= run_tab[i];
6074             }
6075         }
6076     }
6077
6078     s->coded_score[n] = last_score;
6079
6080     dc= FFABS(block[0]);
6081     last_non_zero= last_i - 1;
6082     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6083
6084     if(last_non_zero < start_i)
6085         return last_non_zero;
6086
6087     if(last_non_zero == 0 && start_i == 0){
6088         int best_level= 0;
6089         int best_score= dc * dc;
6090
6091         for(i=0; i<coeff_count[0]; i++){
6092             int level= coeff[i][0];
6093             int alevel= FFABS(level);
6094             int unquant_coeff, score, distortion;
6095
6096             if(s->out_format == FMT_H263){
6097                     unquant_coeff= (alevel*qmul + qadd)>>3;
6098             }else{ //MPEG1
6099                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6100                     unquant_coeff =   (unquant_coeff - 1) | 1;
6101             }
6102             unquant_coeff = (unquant_coeff + 4) >> 3;
6103             unquant_coeff<<= 3 + 3;
6104
6105             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6106             level+=64;
6107             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6108             else                    score= distortion + esc_length*lambda;
6109
6110             if(score < best_score){
6111                 best_score= score;
6112                 best_level= level - 64;
6113             }
6114         }
6115         block[0]= best_level;
6116         s->coded_score[n] = best_score - dc*dc;
6117         if(best_level == 0) return -1;
6118         else                return last_non_zero;
6119     }
6120
6121     i= last_i;
6122     assert(last_level);
6123
6124     block[ perm_scantable[last_non_zero] ]= last_level;
6125     i -= last_run + 1;
6126
6127     for(; i>start_i; i -= run_tab[i] + 1){
6128         block[ perm_scantable[i-1] ]= level_tab[i];
6129     }
6130
6131     return last_non_zero;
6132 }
6133
6134 //#define REFINE_STATS 1
6135 static int16_t basis[64][64];
6136
6137 static void build_basis(uint8_t *perm){
6138     int i, j, x, y;
6139     emms_c();
6140     for(i=0; i<8; i++){
6141         for(j=0; j<8; j++){
6142             for(y=0; y<8; y++){
6143                 for(x=0; x<8; x++){
6144                     double s= 0.25*(1<<BASIS_SHIFT);
6145                     int index= 8*i + j;
6146                     int perm_index= perm[index];
6147                     if(i==0) s*= sqrt(0.5);
6148                     if(j==0) s*= sqrt(0.5);
6149                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6150                 }
6151             }
6152         }
6153     }
6154 }
6155
6156 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6157                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6158                         int n, int qscale){
6159     int16_t rem[64];
6160     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6161     const int *qmat;
6162     const uint8_t *scantable= s->intra_scantable.scantable;
6163     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6164 //    unsigned int threshold1, threshold2;
6165 //    int bias=0;
6166     int run_tab[65];
6167     int prev_run=0;
6168     int prev_level=0;
6169     int qmul, qadd, start_i, last_non_zero, i, dc;
6170     uint8_t * length;
6171     uint8_t * last_length;
6172     int lambda;
6173     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6174 #ifdef REFINE_STATS
6175 static int count=0;
6176 static int after_last=0;
6177 static int to_zero=0;
6178 static int from_zero=0;
6179 static int raise=0;
6180 static int lower=0;
6181 static int messed_sign=0;
6182 #endif
6183
6184     if(basis[0][0] == 0)
6185         build_basis(s->dsp.idct_permutation);
6186
6187     qmul= qscale*2;
6188     qadd= (qscale-1)|1;
6189     if (s->mb_intra) {
6190         if (!s->h263_aic) {
6191             if (n < 4)
6192                 q = s->y_dc_scale;
6193             else
6194                 q = s->c_dc_scale;
6195         } else{
6196             /* For AIC we skip quant/dequant of INTRADC */
6197             q = 1;
6198             qadd=0;
6199         }
6200         q <<= RECON_SHIFT-3;
6201         /* note: block[0] is assumed to be positive */
6202         dc= block[0]*q;
6203 //        block[0] = (block[0] + (q >> 1)) / q;
6204         start_i = 1;
6205         qmat = s->q_intra_matrix[qscale];
6206 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6207 //            bias= 1<<(QMAT_SHIFT-1);
6208         length     = s->intra_ac_vlc_length;
6209         last_length= s->intra_ac_vlc_last_length;
6210     } else {
6211         dc= 0;
6212         start_i = 0;
6213         qmat = s->q_inter_matrix[qscale];
6214         length     = s->inter_ac_vlc_length;
6215         last_length= s->inter_ac_vlc_last_length;
6216     }
6217     last_non_zero = s->block_last_index[n];
6218
6219 #ifdef REFINE_STATS
6220 {START_TIMER
6221 #endif
6222     dc += (1<<(RECON_SHIFT-1));
6223     for(i=0; i<64; i++){
6224         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6225     }
6226 #ifdef REFINE_STATS
6227 STOP_TIMER("memset rem[]")}
6228 #endif
6229     sum=0;
6230     for(i=0; i<64; i++){
6231         int one= 36;
6232         int qns=4;
6233         int w;
6234
6235         w= FFABS(weight[i]) + qns*one;
6236         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6237
6238         weight[i] = w;
6239 //        w=weight[i] = (63*qns + (w/2)) / w;
6240
6241         assert(w>0);
6242         assert(w<(1<<6));
6243         sum += w*w;
6244     }
6245     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6246 #ifdef REFINE_STATS
6247 {START_TIMER
6248 #endif
6249     run=0;
6250     rle_index=0;
6251     for(i=start_i; i<=last_non_zero; i++){
6252         int j= perm_scantable[i];
6253         const int level= block[j];
6254         int coeff;
6255
6256         if(level){
6257             if(level<0) coeff= qmul*level - qadd;
6258             else        coeff= qmul*level + qadd;
6259             run_tab[rle_index++]=run;
6260             run=0;
6261
6262             s->dsp.add_8x8basis(rem, basis[j], coeff);
6263         }else{
6264             run++;
6265         }
6266     }
6267 #ifdef REFINE_STATS
6268 if(last_non_zero>0){
6269 STOP_TIMER("init rem[]")
6270 }
6271 }
6272
6273 {START_TIMER
6274 #endif
6275     for(;;){
6276         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6277         int best_coeff=0;
6278         int best_change=0;
6279         int run2, best_unquant_change=0, analyze_gradient;
6280 #ifdef REFINE_STATS
6281 {START_TIMER
6282 #endif
6283         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6284
6285         if(analyze_gradient){
6286 #ifdef REFINE_STATS
6287 {START_TIMER
6288 #endif
6289             for(i=0; i<64; i++){
6290                 int w= weight[i];
6291
6292                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6293             }
6294 #ifdef REFINE_STATS
6295 STOP_TIMER("rem*w*w")}
6296 {START_TIMER
6297 #endif
6298             s->dsp.fdct(d1);
6299 #ifdef REFINE_STATS
6300 STOP_TIMER("dct")}
6301 #endif
6302         }
6303
6304         if(start_i){
6305             const int level= block[0];
6306             int change, old_coeff;
6307
6308             assert(s->mb_intra);
6309
6310             old_coeff= q*level;
6311
6312             for(change=-1; change<=1; change+=2){
6313                 int new_level= level + change;
6314                 int score, new_coeff;
6315
6316                 new_coeff= q*new_level;
6317                 if(new_coeff >= 2048 || new_coeff < 0)
6318                     continue;
6319
6320                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6321                 if(score<best_score){
6322                     best_score= score;
6323                     best_coeff= 0;
6324                     best_change= change;
6325                     best_unquant_change= new_coeff - old_coeff;
6326                 }
6327             }
6328         }
6329
6330         run=0;
6331         rle_index=0;
6332         run2= run_tab[rle_index++];
6333         prev_level=0;
6334         prev_run=0;
6335
6336         for(i=start_i; i<64; i++){
6337             int j= perm_scantable[i];
6338             const int level= block[j];
6339             int change, old_coeff;
6340
6341             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6342                 break;
6343
6344             if(level){
6345                 if(level<0) old_coeff= qmul*level - qadd;
6346                 else        old_coeff= qmul*level + qadd;
6347                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6348             }else{
6349                 old_coeff=0;
6350                 run2--;
6351                 assert(run2>=0 || i >= last_non_zero );
6352             }
6353
6354             for(change=-1; change<=1; change+=2){
6355                 int new_level= level + change;
6356                 int score, new_coeff, unquant_change;
6357
6358                 score=0;
6359                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6360                    continue;
6361
6362                 if(new_level){
6363                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6364                     else            new_coeff= qmul*new_level + qadd;
6365                     if(new_coeff >= 2048 || new_coeff <= -2048)
6366                         continue;
6367                     //FIXME check for overflow
6368
6369                     if(level){
6370                         if(level < 63 && level > -63){
6371                             if(i < last_non_zero)
6372                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6373                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6374                             else
6375                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6376                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6377                         }
6378                     }else{
6379                         assert(FFABS(new_level)==1);
6380
6381                         if(analyze_gradient){
6382                             int g= d1[ scantable[i] ];
6383                             if(g && (g^new_level) >= 0)
6384                                 continue;
6385                         }
6386
6387                         if(i < last_non_zero){
6388                             int next_i= i + run2 + 1;
6389                             int next_level= block[ perm_scantable[next_i] ] + 64;
6390
6391                             if(next_level&(~127))
6392                                 next_level= 0;
6393
6394                             if(next_i < last_non_zero)
6395                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6396                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6397                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6398                             else
6399                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6400                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6401                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6402                         }else{
6403                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6404                             if(prev_level){
6405                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6406                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6407                             }
6408                         }
6409                     }
6410                 }else{
6411                     new_coeff=0;
6412                     assert(FFABS(level)==1);
6413
6414                     if(i < last_non_zero){
6415                         int next_i= i + run2 + 1;
6416                         int next_level= block[ perm_scantable[next_i] ] + 64;
6417
6418                         if(next_level&(~127))
6419                             next_level= 0;
6420
6421                         if(next_i < last_non_zero)
6422                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6423                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6424                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6425                         else
6426                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6427                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6428                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6429                     }else{
6430                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6431                         if(prev_level){
6432                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6433                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6434                         }
6435                     }
6436                 }
6437
6438                 score *= lambda;
6439
6440                 unquant_change= new_coeff - old_coeff;
6441                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6442
6443                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6444                 if(score<best_score){
6445                     best_score= score;
6446                     best_coeff= i;
6447                     best_change= change;
6448                     best_unquant_change= unquant_change;
6449                 }
6450             }
6451             if(level){
6452                 prev_level= level + 64;
6453                 if(prev_level&(~127))
6454                     prev_level= 0;
6455                 prev_run= run;
6456                 run=0;
6457             }else{
6458                 run++;
6459             }
6460         }
6461 #ifdef REFINE_STATS
6462 STOP_TIMER("iterative step")}
6463 #endif
6464
6465         if(best_change){
6466             int j= perm_scantable[ best_coeff ];
6467
6468             block[j] += best_change;
6469
6470             if(best_coeff > last_non_zero){
6471                 last_non_zero= best_coeff;
6472                 assert(block[j]);
6473 #ifdef REFINE_STATS
6474 after_last++;
6475 #endif
6476             }else{
6477 #ifdef REFINE_STATS
6478 if(block[j]){
6479     if(block[j] - best_change){
6480         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6481             raise++;
6482         }else{
6483             lower++;
6484         }
6485     }else{
6486         from_zero++;
6487     }
6488 }else{
6489     to_zero++;
6490 }
6491 #endif
6492                 for(; last_non_zero>=start_i; last_non_zero--){
6493                     if(block[perm_scantable[last_non_zero]])
6494                         break;
6495                 }
6496             }
6497 #ifdef REFINE_STATS
6498 count++;
6499 if(256*256*256*64 % count == 0){
6500     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6501 }
6502 #endif
6503             run=0;
6504             rle_index=0;
6505             for(i=start_i; i<=last_non_zero; i++){
6506                 int j= perm_scantable[i];
6507                 const int level= block[j];
6508
6509                  if(level){
6510                      run_tab[rle_index++]=run;
6511                      run=0;
6512                  }else{
6513                      run++;
6514                  }
6515             }
6516
6517             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6518         }else{
6519             break;
6520         }
6521     }
6522 #ifdef REFINE_STATS
6523 if(last_non_zero>0){
6524 STOP_TIMER("iterative search")
6525 }
6526 }
6527 #endif
6528
6529     return last_non_zero;
6530 }
6531
6532 static int dct_quantize_c(MpegEncContext *s,
6533                         DCTELEM *block, int n,
6534                         int qscale, int *overflow)
6535 {
6536     int i, j, level, last_non_zero, q, start_i;
6537     const int *qmat;
6538     const uint8_t *scantable= s->intra_scantable.scantable;
6539     int bias;
6540     int max=0;
6541     unsigned int threshold1, threshold2;
6542
6543     s->dsp.fdct (block);
6544
6545     if(s->dct_error_sum)
6546         s->denoise_dct(s, block);
6547
6548     if (s->mb_intra) {
6549         if (!s->h263_aic) {
6550             if (n < 4)
6551                 q = s->y_dc_scale;
6552             else
6553                 q = s->c_dc_scale;
6554             q = q << 3;
6555         } else
6556             /* For AIC we skip quant/dequant of INTRADC */
6557             q = 1 << 3;
6558
6559         /* note: block[0] is assumed to be positive */
6560         block[0] = (block[0] + (q >> 1)) / q;
6561         start_i = 1;
6562         last_non_zero = 0;
6563         qmat = s->q_intra_matrix[qscale];
6564         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6565     } else {
6566         start_i = 0;
6567         last_non_zero = -1;
6568         qmat = s->q_inter_matrix[qscale];
6569         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6570     }
6571     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6572     threshold2= (threshold1<<1);
6573     for(i=63;i>=start_i;i--) {
6574         j = scantable[i];
6575         level = block[j] * qmat[j];
6576
6577         if(((unsigned)(level+threshold1))>threshold2){
6578             last_non_zero = i;
6579             break;
6580         }else{
6581             block[j]=0;
6582         }
6583     }
6584     for(i=start_i; i<=last_non_zero; i++) {
6585         j = scantable[i];
6586         level = block[j] * qmat[j];
6587
6588 //        if(   bias+level >= (1<<QMAT_SHIFT)
6589 //           || bias-level >= (1<<QMAT_SHIFT)){
6590         if(((unsigned)(level+threshold1))>threshold2){
6591             if(level>0){
6592                 level= (bias + level)>>QMAT_SHIFT;
6593                 block[j]= level;
6594             }else{
6595                 level= (bias - level)>>QMAT_SHIFT;
6596                 block[j]= -level;
6597             }
6598             max |=level;
6599         }else{
6600             block[j]=0;
6601         }
6602     }
6603     *overflow= s->max_qcoeff < max; //overflow might have happened
6604
6605     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6606     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6607         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6608
6609     return last_non_zero;
6610 }
6611
6612 #endif //CONFIG_ENCODERS
6613
6614 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6615                                    DCTELEM *block, int n, int qscale)
6616 {
6617     int i, level, nCoeffs;
6618     const uint16_t *quant_matrix;
6619
6620     nCoeffs= s->block_last_index[n];
6621
6622     if (n < 4)
6623         block[0] = block[0] * s->y_dc_scale;
6624     else
6625         block[0] = block[0] * s->c_dc_scale;
6626     /* XXX: only mpeg1 */
6627     quant_matrix = s->intra_matrix;
6628     for(i=1;i<=nCoeffs;i++) {
6629         int j= s->intra_scantable.permutated[i];
6630         level = block[j];
6631         if (level) {
6632             if (level < 0) {
6633                 level = -level;
6634                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6635                 level = (level - 1) | 1;
6636                 level = -level;
6637             } else {
6638                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6639                 level = (level - 1) | 1;
6640             }
6641             block[j] = level;
6642         }
6643     }
6644 }
6645
6646 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6647                                    DCTELEM *block, int n, int qscale)
6648 {
6649     int i, level, nCoeffs;
6650     const uint16_t *quant_matrix;
6651
6652     nCoeffs= s->block_last_index[n];
6653
6654     quant_matrix = s->inter_matrix;
6655     for(i=0; i<=nCoeffs; i++) {
6656         int j= s->intra_scantable.permutated[i];
6657         level = block[j];
6658         if (level) {
6659             if (level < 0) {
6660                 level = -level;
6661                 level = (((level << 1) + 1) * qscale *
6662                          ((int) (quant_matrix[j]))) >> 4;
6663                 level = (level - 1) | 1;
6664                 level = -level;
6665             } else {
6666                 level = (((level << 1) + 1) * qscale *
6667                          ((int) (quant_matrix[j]))) >> 4;
6668                 level = (level - 1) | 1;
6669             }
6670             block[j] = level;
6671         }
6672     }
6673 }
6674
6675 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6676                                    DCTELEM *block, int n, int qscale)
6677 {
6678     int i, level, nCoeffs;
6679     const uint16_t *quant_matrix;
6680
6681     if(s->alternate_scan) nCoeffs= 63;
6682     else nCoeffs= s->block_last_index[n];
6683
6684     if (n < 4)
6685         block[0] = block[0] * s->y_dc_scale;
6686     else
6687         block[0] = block[0] * s->c_dc_scale;
6688     quant_matrix = s->intra_matrix;
6689     for(i=1;i<=nCoeffs;i++) {
6690         int j= s->intra_scantable.permutated[i];
6691         level = block[j];
6692         if (level) {
6693             if (level < 0) {
6694                 level = -level;
6695                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6696                 level = -level;
6697             } else {
6698                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6699             }
6700             block[j] = level;
6701         }
6702     }
6703 }
6704
6705 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6706                                    DCTELEM *block, int n, int qscale)
6707 {
6708     int i, level, nCoeffs;
6709     const uint16_t *quant_matrix;
6710     int sum=-1;
6711
6712     if(s->alternate_scan) nCoeffs= 63;
6713     else nCoeffs= s->block_last_index[n];
6714
6715     if (n < 4)
6716         block[0] = block[0] * s->y_dc_scale;
6717     else
6718         block[0] = block[0] * s->c_dc_scale;
6719     quant_matrix = s->intra_matrix;
6720     for(i=1;i<=nCoeffs;i++) {
6721         int j= s->intra_scantable.permutated[i];
6722         level = block[j];
6723         if (level) {
6724             if (level < 0) {
6725                 level = -level;
6726                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6727                 level = -level;
6728             } else {
6729                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6730             }
6731             block[j] = level;
6732             sum+=level;
6733         }
6734     }
6735     block[63]^=sum&1;
6736 }
6737
6738 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6739                                    DCTELEM *block, int n, int qscale)
6740 {
6741     int i, level, nCoeffs;
6742     const uint16_t *quant_matrix;
6743     int sum=-1;
6744
6745     if(s->alternate_scan) nCoeffs= 63;
6746     else nCoeffs= s->block_last_index[n];
6747
6748     quant_matrix = s->inter_matrix;
6749     for(i=0; i<=nCoeffs; i++) {
6750         int j= s->intra_scantable.permutated[i];
6751         level = block[j];
6752         if (level) {
6753             if (level < 0) {
6754                 level = -level;
6755                 level = (((level << 1) + 1) * qscale *
6756                          ((int) (quant_matrix[j]))) >> 4;
6757                 level = -level;
6758             } else {
6759                 level = (((level << 1) + 1) * qscale *
6760                          ((int) (quant_matrix[j]))) >> 4;
6761             }
6762             block[j] = level;
6763             sum+=level;
6764         }
6765     }
6766     block[63]^=sum&1;
6767 }
6768
6769 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6770                                   DCTELEM *block, int n, int qscale)
6771 {
6772     int i, level, qmul, qadd;
6773     int nCoeffs;
6774
6775     assert(s->block_last_index[n]>=0);
6776
6777     qmul = qscale << 1;
6778
6779     if (!s->h263_aic) {
6780         if (n < 4)
6781             block[0] = block[0] * s->y_dc_scale;
6782         else
6783             block[0] = block[0] * s->c_dc_scale;
6784         qadd = (qscale - 1) | 1;
6785     }else{
6786         qadd = 0;
6787     }
6788     if(s->ac_pred)
6789         nCoeffs=63;
6790     else
6791         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6792
6793     for(i=1; i<=nCoeffs; i++) {
6794         level = block[i];
6795         if (level) {
6796             if (level < 0) {
6797                 level = level * qmul - qadd;
6798             } else {
6799                 level = level * qmul + qadd;
6800             }
6801             block[i] = level;
6802         }
6803     }
6804 }
6805
6806 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6807                                   DCTELEM *block, int n, int qscale)
6808 {
6809     int i, level, qmul, qadd;
6810     int nCoeffs;
6811
6812     assert(s->block_last_index[n]>=0);
6813
6814     qadd = (qscale - 1) | 1;
6815     qmul = qscale << 1;
6816
6817     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6818
6819     for(i=0; i<=nCoeffs; i++) {
6820         level = block[i];
6821         if (level) {
6822             if (level < 0) {
6823                 level = level * qmul - qadd;
6824             } else {
6825                 level = level * qmul + qadd;
6826             }
6827             block[i] = level;
6828         }
6829     }
6830 }
6831
6832 #ifdef CONFIG_ENCODERS
6833 AVCodec h263_encoder = {
6834     "h263",
6835     CODEC_TYPE_VIDEO,
6836     CODEC_ID_H263,
6837     sizeof(MpegEncContext),
6838     MPV_encode_init,
6839     MPV_encode_picture,
6840     MPV_encode_end,
6841     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6842 };
6843
6844 AVCodec h263p_encoder = {
6845     "h263p",
6846     CODEC_TYPE_VIDEO,
6847     CODEC_ID_H263P,
6848     sizeof(MpegEncContext),
6849     MPV_encode_init,
6850     MPV_encode_picture,
6851     MPV_encode_end,
6852     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6853 };
6854
6855 AVCodec flv_encoder = {
6856     "flv",
6857     CODEC_TYPE_VIDEO,
6858     CODEC_ID_FLV1,
6859     sizeof(MpegEncContext),
6860     MPV_encode_init,
6861     MPV_encode_picture,
6862     MPV_encode_end,
6863     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6864 };
6865
6866 AVCodec rv10_encoder = {
6867     "rv10",
6868     CODEC_TYPE_VIDEO,
6869     CODEC_ID_RV10,
6870     sizeof(MpegEncContext),
6871     MPV_encode_init,
6872     MPV_encode_picture,
6873     MPV_encode_end,
6874     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6875 };
6876
6877 AVCodec rv20_encoder = {
6878     "rv20",
6879     CODEC_TYPE_VIDEO,
6880     CODEC_ID_RV20,
6881     sizeof(MpegEncContext),
6882     MPV_encode_init,
6883     MPV_encode_picture,
6884     MPV_encode_end,
6885     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6886 };
6887
6888 AVCodec mpeg4_encoder = {
6889     "mpeg4",
6890     CODEC_TYPE_VIDEO,
6891     CODEC_ID_MPEG4,
6892     sizeof(MpegEncContext),
6893     MPV_encode_init,
6894     MPV_encode_picture,
6895     MPV_encode_end,
6896     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6897     .capabilities= CODEC_CAP_DELAY,
6898 };
6899
6900 AVCodec msmpeg4v1_encoder = {
6901     "msmpeg4v1",
6902     CODEC_TYPE_VIDEO,
6903     CODEC_ID_MSMPEG4V1,
6904     sizeof(MpegEncContext),
6905     MPV_encode_init,
6906     MPV_encode_picture,
6907     MPV_encode_end,
6908     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6909 };
6910
6911 AVCodec msmpeg4v2_encoder = {
6912     "msmpeg4v2",
6913     CODEC_TYPE_VIDEO,
6914     CODEC_ID_MSMPEG4V2,
6915     sizeof(MpegEncContext),
6916     MPV_encode_init,
6917     MPV_encode_picture,
6918     MPV_encode_end,
6919     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6920 };
6921
6922 AVCodec msmpeg4v3_encoder = {
6923     "msmpeg4",
6924     CODEC_TYPE_VIDEO,
6925     CODEC_ID_MSMPEG4V3,
6926     sizeof(MpegEncContext),
6927     MPV_encode_init,
6928     MPV_encode_picture,
6929     MPV_encode_end,
6930     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6931 };
6932
6933 AVCodec wmv1_encoder = {
6934     "wmv1",
6935     CODEC_TYPE_VIDEO,
6936     CODEC_ID_WMV1,
6937     sizeof(MpegEncContext),
6938     MPV_encode_init,
6939     MPV_encode_picture,
6940     MPV_encode_end,
6941     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6942 };
6943
6944 #ifdef CONFIG_MJPEG_ENCODER
6945 AVCodec mjpeg_encoder = {
6946     "mjpeg",
6947     CODEC_TYPE_VIDEO,
6948     CODEC_ID_MJPEG,
6949     sizeof(MpegEncContext),
6950     MPV_encode_init,
6951     MPV_encode_picture,
6952     MPV_encode_end,
6953     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
6954 };
6955 #endif
6956
6957 #endif //CONFIG_ENCODERS