git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "mjpegenc.h"
  34 #include "msmpeg4.h"
  35 #include "faandct.h"
  36 #include <limits.h>
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static int encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  59 #ifdef CONFIG_ENCODERS
  60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  63 static int sse_mb(MpegEncContext *s);
  64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  65 #endif //CONFIG_ENCODERS
  66
  67 #ifdef HAVE_XVMC
  68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  69 extern void XVMC_field_end(MpegEncContext *s);
  70 extern void XVMC_decode_mb(MpegEncContext *s);
  71 #endif
  72
  73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  74
  75
  76 /* enable all paranoid tests for rounding, overflows, etc... */
  77 //#define PARANOID
  78
  79 //#define DEBUG
  80
  81
  82 /* for jpeg fast DCT */
  83 #define CONST_BITS 14
  84
  85 static const uint16_t aanscales[64] = {
  86     /* precomputed values scaled up by 14 bits */
  87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  95 };
  96
  97 static const uint8_t h263_chroma_roundtab[16] = {
  98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 100 };
 101
 102 static const uint8_t ff_default_chroma_qscale_table[32]={
 103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 105 };
 106
 107 #ifdef CONFIG_ENCODERS
 108 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
 109 static uint8_t default_fcode_tab[MAX_MV*2+1];
 110
 111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 112
 113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 115 {
 116     int qscale;
 117     int shift=0;
 118
 119     for(qscale=qmin; qscale<=qmax; qscale++){
 120         int i;
 121         if (dsp->fdct == ff_jpeg_fdct_islow
 122 #ifdef FAAN_POSTSCALE
 123             || dsp->fdct == ff_faandct
 124 #endif
 125             ) {
 126             for(i=0;i<64;i++) {
 127                 const int j= dsp->idct_permutation[i];
 128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 132
 133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 134                                 (qscale * quant_matrix[j]));
 135             }
 136         } else if (dsp->fdct == fdct_ifast
 137 #ifndef FAAN_POSTSCALE
 138                    || dsp->fdct == ff_faandct
 139 #endif
 140                    ) {
 141             for(i=0;i<64;i++) {
 142                 const int j= dsp->idct_permutation[i];
 143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 147
 148                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 149                                 (aanscales[i] * qscale * quant_matrix[j]));
 150             }
 151         } else {
 152             for(i=0;i<64;i++) {
 153                 const int j= dsp->idct_permutation[i];
 154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 155                    So 16           <= qscale * quant_matrix[i]             <= 7905
 156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 158                 */
 159                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 162
 163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 165             }
 166         }
 167
 168         for(i=intra; i<64; i++){
 169             int64_t max= 8191;
 170             if (dsp->fdct == fdct_ifast
 171 #ifndef FAAN_POSTSCALE
 172                    || dsp->fdct == ff_faandct
 173 #endif
 174                    ) {
 175                 max= (8191LL*aanscales[i]) >> 14;
 176             }
 177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 178                 shift++;
 179             }
 180         }
 181     }
 182     if(shift){
 183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
 184     }
 185 }
 186
 187 static inline void update_qscale(MpegEncContext *s){
 188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 189     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 190
 191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 192 }
 193 #endif //CONFIG_ENCODERS
 194
 195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 196     int i;
 197     int end;
 198
 199     st->scantable= src_scantable;
 200
 201     for(i=0; i<64; i++){
 202         int j;
 203         j = src_scantable[i];
 204         st->permutated[i] = permutation[j];
 205 #ifdef ARCH_POWERPC
 206         st->inverse[j] = i;
 207 #endif
 208     }
 209
 210     end=-1;
 211     for(i=0; i<64; i++){
 212         int j;
 213         j = st->permutated[i];
 214         if(j>end) end=j;
 215         st->raster_end[i]= end;
 216     }
 217 }
 218
 219 #ifdef CONFIG_ENCODERS
 220 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
 221     int i;
 222
 223     if(matrix){
 224         put_bits(pb, 1, 1);
 225         for(i=0;i<64;i++) {
 226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 227         }
 228     }else
 229         put_bits(pb, 1, 0);
 230 }
 231 #endif //CONFIG_ENCODERS
 232
 233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 234     int i;
 235
 236     assert(p<=end);
 237     if(p>=end)
 238         return end;
 239
 240     for(i=0; i<3; i++){
 241         uint32_t tmp= *state << 8;
 242         *state= tmp + *(p++);
 243         if(tmp == 0x100 || p==end)
 244             return p;
 245     }
 246
 247     while(p<end){
 248         if     (p[-1] > 1      ) p+= 3;
 249         else if(p[-2]          ) p+= 2;
 250         else if(p[-3]|(p[-1]-1)) p++;
 251         else{
 252             p++;
 253             break;
 254         }
 255     }
 256
 257     p= FFMIN(p, end)-4;
 258     *state=  be2me_32(unaligned32(p));
 259
 260     return p+4;
 261 }
 262
 263 /* init common dct for both encoder and decoder */
 264 int DCT_common_init(MpegEncContext *s)
 265 {
 266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 271     if(s->flags & CODEC_FLAG_BITEXACT)
 272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 274
 275 #ifdef CONFIG_ENCODERS
 276     s->dct_quantize= dct_quantize_c;
 277     s->denoise_dct= denoise_dct_c;
 278 #endif //CONFIG_ENCODERS
 279
 280 #ifdef HAVE_MMX
 281     MPV_common_init_mmx(s);
 282 #endif
 283 #ifdef ARCH_ALPHA
 284     MPV_common_init_axp(s);
 285 #endif
 286 #ifdef HAVE_MLIB
 287     MPV_common_init_mlib(s);
 288 #endif
 289 #ifdef HAVE_MMI
 290     MPV_common_init_mmi(s);
 291 #endif
 292 #ifdef ARCH_ARMV4L
 293     MPV_common_init_armv4l(s);
 294 #endif
 295 #ifdef ARCH_POWERPC
 296     MPV_common_init_ppc(s);
 297 #endif
 298
 299 #ifdef CONFIG_ENCODERS
 300     s->fast_dct_quantize= s->dct_quantize;
 301
 302     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 303         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 304     }
 305
 306 #endif //CONFIG_ENCODERS
 307
 308     /* load & permutate scantables
 309        note: only wmv uses different ones
 310     */
 311     if(s->alternate_scan){
 312         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 313         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 314     }else{
 315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 317     }
 318     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 319     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 320
 321     return 0;
 322 }
 323
 324 static void copy_picture(Picture *dst, Picture *src){
 325     *dst = *src;
 326     dst->type= FF_BUFFER_TYPE_COPY;
 327 }
 328
 329 #ifdef CONFIG_ENCODERS
 330 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 331     int i;
 332
 333     dst->pict_type              = src->pict_type;
 334     dst->quality                = src->quality;
 335     dst->coded_picture_number   = src->coded_picture_number;
 336     dst->display_picture_number = src->display_picture_number;
 337 //    dst->reference              = src->reference;
 338     dst->pts                    = src->pts;
 339     dst->interlaced_frame       = src->interlaced_frame;
 340     dst->top_field_first        = src->top_field_first;
 341
 342     if(s->avctx->me_threshold){
 343         if(!src->motion_val[0])
 344             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 345         if(!src->mb_type)
 346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 347         if(!src->ref_index[0])
 348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 349         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 351             src->motion_subsample_log2, dst->motion_subsample_log2);
 352
 353         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 354
 355         for(i=0; i<2; i++){
 356             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 357             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 358
 359             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 360                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 361             }
 362             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 363                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 364             }
 365         }
 366     }
 367 }
 368 #endif
 369
 370 /**
 371  * allocates a Picture
 372  * The pixels are allocated/set by calling get_buffer() if shared=0
 373  */
 374 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 375     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 376     const int mb_array_size= s->mb_stride*s->mb_height;
 377     const int b8_array_size= s->b8_stride*s->mb_height*2;
 378     const int b4_array_size= s->b4_stride*s->mb_height*4;
 379     int i;
 380
 381     if(shared){
 382         assert(pic->data[0]);
 383         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 384         pic->type= FF_BUFFER_TYPE_SHARED;
 385     }else{
 386         int r;
 387
 388         assert(!pic->data[0]);
 389
 390         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 391
 392         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 393             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 394             return -1;
 395         }
 396
 397         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 398             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 399             return -1;
 400         }
 401
 402         if(pic->linesize[1] != pic->linesize[2]){
 403             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 404             return -1;
 405         }
 406
 407         s->linesize  = pic->linesize[0];
 408         s->uvlinesize= pic->linesize[1];
 409     }
 410
 411     if(pic->qscale_table==NULL){
 412         if (s->encoding) {
 413             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 414             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 415             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 416         }
 417
 418         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 419         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 420         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 421         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 422         if(s->out_format == FMT_H264){
 423             for(i=0; i<2; i++){
 424                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 425                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 426                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 427             }
 428             pic->motion_subsample_log2= 2;
 429         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 430             for(i=0; i<2; i++){
 431                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 432                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 433                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 434             }
 435             pic->motion_subsample_log2= 3;
 436         }
 437         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 438             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 439         }
 440         pic->qstride= s->mb_stride;
 441         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 442     }
 443
 444     //it might be nicer if the application would keep track of these but it would require a API change
 445     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 446     s->prev_pict_types[0]= s->pict_type;
 447     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 448         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 449
 450     return 0;
 451 fail: //for the CHECKED_ALLOCZ macro
 452     return -1;
 453 }
 454
 455 /**
 456  * deallocates a picture
 457  */
 458 static void free_picture(MpegEncContext *s, Picture *pic){
 459     int i;
 460
 461     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 462         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 463     }
 464
 465     av_freep(&pic->mb_var);
 466     av_freep(&pic->mc_mb_var);
 467     av_freep(&pic->mb_mean);
 468     av_freep(&pic->mbskip_table);
 469     av_freep(&pic->qscale_table);
 470     av_freep(&pic->mb_type_base);
 471     av_freep(&pic->dct_coeff);
 472     av_freep(&pic->pan_scan);
 473     pic->mb_type= NULL;
 474     for(i=0; i<2; i++){
 475         av_freep(&pic->motion_val_base[i]);
 476         av_freep(&pic->ref_index[i]);
 477     }
 478
 479     if(pic->type == FF_BUFFER_TYPE_SHARED){
 480         for(i=0; i<4; i++){
 481             pic->base[i]=
 482             pic->data[i]= NULL;
 483         }
 484         pic->type= 0;
 485     }
 486 }
 487
 488 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 489     int i;
 490
 491     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 492     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 493     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 494
 495      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 496     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 497     s->rd_scratchpad=   s->me.scratchpad;
 498     s->b_scratchpad=    s->me.scratchpad;
 499     s->obmc_scratchpad= s->me.scratchpad + 16;
 500     if (s->encoding) {
 501         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 502         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 503         if(s->avctx->noise_reduction){
 504             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 505         }
 506     }
 507     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 508     s->block= s->blocks[0];
 509
 510     for(i=0;i<12;i++){
 511         s->pblocks[i] = (short *)(&s->block[i]);
 512     }
 513     return 0;
 514 fail:
 515     return -1; //free() through MPV_common_end()
 516 }
 517
 518 static void free_duplicate_context(MpegEncContext *s){
 519     if(s==NULL) return;
 520
 521     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 522     av_freep(&s->me.scratchpad);
 523     s->rd_scratchpad=
 524     s->b_scratchpad=
 525     s->obmc_scratchpad= NULL;
 526
 527     av_freep(&s->dct_error_sum);
 528     av_freep(&s->me.map);
 529     av_freep(&s->me.score_map);
 530     av_freep(&s->blocks);
 531     s->block= NULL;
 532 }
 533
 534 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 535 #define COPY(a) bak->a= src->a
 536     COPY(allocated_edge_emu_buffer);
 537     COPY(edge_emu_buffer);
 538     COPY(me.scratchpad);
 539     COPY(rd_scratchpad);
 540     COPY(b_scratchpad);
 541     COPY(obmc_scratchpad);
 542     COPY(me.map);
 543     COPY(me.score_map);
 544     COPY(blocks);
 545     COPY(block);
 546     COPY(start_mb_y);
 547     COPY(end_mb_y);
 548     COPY(me.map_generation);
 549     COPY(pb);
 550     COPY(dct_error_sum);
 551     COPY(dct_count[0]);
 552     COPY(dct_count[1]);
 553 #undef COPY
 554 }
 555
 556 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 557     MpegEncContext bak;
 558     int i;
 559     //FIXME copy only needed parts
 560 //START_TIMER
 561     backup_duplicate_context(&bak, dst);
 562     memcpy(dst, src, sizeof(MpegEncContext));
 563     backup_duplicate_context(dst, &bak);
 564     for(i=0;i<12;i++){
 565         dst->pblocks[i] = (short *)(&dst->block[i]);
 566     }
 567 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 568 }
 569
 570 #ifdef CONFIG_ENCODERS
 571 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 572 #define COPY(a) dst->a= src->a
 573     COPY(pict_type);
 574     COPY(current_picture);
 575     COPY(f_code);
 576     COPY(b_code);
 577     COPY(qscale);
 578     COPY(lambda);
 579     COPY(lambda2);
 580     COPY(picture_in_gop_number);
 581     COPY(gop_picture_number);
 582     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 583     COPY(progressive_frame); //FIXME don't set in encode_header
 584     COPY(partitioned_frame); //FIXME don't set in encode_header
 585 #undef COPY
 586 }
 587 #endif
 588
 589 /**
 590  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 591  * the changed fields will not depend upon the prior state of the MpegEncContext.
 592  */
 593 static void MPV_common_defaults(MpegEncContext *s){
 594     s->y_dc_scale_table=
 595     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 596     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 597     s->progressive_frame= 1;
 598     s->progressive_sequence= 1;
 599     s->picture_structure= PICT_FRAME;
 600
 601     s->coded_picture_number = 0;
 602     s->picture_number = 0;
 603     s->input_picture_number = 0;
 604
 605     s->picture_in_gop_number = 0;
 606
 607     s->f_code = 1;
 608     s->b_code = 1;
 609 }
 610
 611 /**
 612  * sets the given MpegEncContext to defaults for decoding.
 613  * the changed fields will not depend upon the prior state of the MpegEncContext.
 614  */
 615 void MPV_decode_defaults(MpegEncContext *s){
 616     MPV_common_defaults(s);
 617 }
 618
 619 /**
 620  * sets the given MpegEncContext to defaults for encoding.
 621  * the changed fields will not depend upon the prior state of the MpegEncContext.
 622  */
 623
 624 #ifdef CONFIG_ENCODERS
 625 static void MPV_encode_defaults(MpegEncContext *s){
 626     int i;
 627     MPV_common_defaults(s);
 628
 629     for(i=-16; i<16; i++){
 630         default_fcode_tab[i + MAX_MV]= 1;
 631     }
 632     s->me.mv_penalty= default_mv_penalty;
 633     s->fcode_tab= default_fcode_tab;
 634 }
 635 #endif //CONFIG_ENCODERS
 636
 637 /**
 638  * init common structure for both encoder and decoder.
 639  * this assumes that some variables like width/height are already set
 640  */
 641 int MPV_common_init(MpegEncContext *s)
 642 {
 643     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 644
 645     s->mb_height = (s->height + 15) / 16;
 646
 647     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 648         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 649         return -1;
 650     }
 651
 652     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 653         return -1;
 654
 655     dsputil_init(&s->dsp, s->avctx);
 656     DCT_common_init(s);
 657
 658     s->flags= s->avctx->flags;
 659     s->flags2= s->avctx->flags2;
 660
 661     s->mb_width  = (s->width  + 15) / 16;
 662     s->mb_stride = s->mb_width + 1;
 663     s->b8_stride = s->mb_width*2 + 1;
 664     s->b4_stride = s->mb_width*4 + 1;
 665     mb_array_size= s->mb_height * s->mb_stride;
 666     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 667
 668     /* set chroma shifts */
 669     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 670                                                     &(s->chroma_y_shift) );
 671
 672     /* set default edge pos, will be overriden in decode_header if needed */
 673     s->h_edge_pos= s->mb_width*16;
 674     s->v_edge_pos= s->mb_height*16;
 675
 676     s->mb_num = s->mb_width * s->mb_height;
 677
 678     s->block_wrap[0]=
 679     s->block_wrap[1]=
 680     s->block_wrap[2]=
 681     s->block_wrap[3]= s->b8_stride;
 682     s->block_wrap[4]=
 683     s->block_wrap[5]= s->mb_stride;
 684
 685     y_size = s->b8_stride * (2 * s->mb_height + 1);
 686     c_size = s->mb_stride * (s->mb_height + 1);
 687     yc_size = y_size + 2 * c_size;
 688
 689     /* convert fourcc to upper case */
 690     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 691                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 692                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 693                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 694
 695     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 696                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 697                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 698                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 699
 700     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 701
 702     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 703     for(y=0; y<s->mb_height; y++){
 704         for(x=0; x<s->mb_width; x++){
 705             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 706         }
 707     }
 708     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 709
 710     if (s->encoding) {
 711         /* Allocate MV tables */
 712         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 713         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 714         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 715         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 716         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 717         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 718         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 719         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 720         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 721         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 722         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 723         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 724
 725         if(s->msmpeg4_version){
 726             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 727         }
 728         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 729
 730         /* Allocate MB type table */
 731         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 732
 733         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 734
 735         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 736         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 737         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 738         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 739         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 740         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 741
 742         if(s->avctx->noise_reduction){
 743             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 744         }
 745     }
 746     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 747
 748     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 749
 750     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 751         /* interlaced direct mode decoding tables */
 752             for(i=0; i<2; i++){
 753                 int j, k;
 754                 for(j=0; j<2; j++){
 755                     for(k=0; k<2; k++){
 756                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 757                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 758                     }
 759                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 760                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 761                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 762                 }
 763                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 764             }
 765     }
 766     if (s->out_format == FMT_H263) {
 767         /* ac values */
 768         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 769         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 770         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 771         s->ac_val[2] = s->ac_val[1] + c_size;
 772
 773         /* cbp values */
 774         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 775         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 776
 777         /* cbp, ac_pred, pred_dir */
 778         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 779         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 780     }
 781
 782     if (s->h263_pred || s->h263_plus || !s->encoding) {
 783         /* dc values */
 784         //MN: we need these for error resilience of intra-frames
 785         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 786         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 787         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 788         s->dc_val[2] = s->dc_val[1] + c_size;
 789         for(i=0;i<yc_size;i++)
 790             s->dc_val_base[i] = 1024;
 791     }
 792
 793     /* which mb is a intra block */
 794     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 795     memset(s->mbintra_table, 1, mb_array_size);
 796
 797     /* init macroblock skip table */
 798     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 799     //Note the +1 is for a quicker mpeg4 slice_end detection
 800     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 801
 802     s->parse_context.state= -1;
 803     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 804        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 805        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 806        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 807     }
 808
 809     s->context_initialized = 1;
 810
 811     s->thread_context[0]= s;
 812     for(i=1; i<s->avctx->thread_count; i++){
 813         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 814         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 815     }
 816
 817     for(i=0; i<s->avctx->thread_count; i++){
 818         if(init_duplicate_context(s->thread_context[i], s) < 0)
 819            goto fail;
 820         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 821         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 822     }
 823
 824     return 0;
 825  fail:
 826     MPV_common_end(s);
 827     return -1;
 828 }
 829
 830 /* init common structure for both encoder and decoder */
 831 void MPV_common_end(MpegEncContext *s)
 832 {
 833     int i, j, k;
 834
 835     for(i=0; i<s->avctx->thread_count; i++){
 836         free_duplicate_context(s->thread_context[i]);
 837     }
 838     for(i=1; i<s->avctx->thread_count; i++){
 839         av_freep(&s->thread_context[i]);
 840     }
 841
 842     av_freep(&s->parse_context.buffer);
 843     s->parse_context.buffer_size=0;
 844
 845     av_freep(&s->mb_type);
 846     av_freep(&s->p_mv_table_base);
 847     av_freep(&s->b_forw_mv_table_base);
 848     av_freep(&s->b_back_mv_table_base);
 849     av_freep(&s->b_bidir_forw_mv_table_base);
 850     av_freep(&s->b_bidir_back_mv_table_base);
 851     av_freep(&s->b_direct_mv_table_base);
 852     s->p_mv_table= NULL;
 853     s->b_forw_mv_table= NULL;
 854     s->b_back_mv_table= NULL;
 855     s->b_bidir_forw_mv_table= NULL;
 856     s->b_bidir_back_mv_table= NULL;
 857     s->b_direct_mv_table= NULL;
 858     for(i=0; i<2; i++){
 859         for(j=0; j<2; j++){
 860             for(k=0; k<2; k++){
 861                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 862                 s->b_field_mv_table[i][j][k]=NULL;
 863             }
 864             av_freep(&s->b_field_select_table[i][j]);
 865             av_freep(&s->p_field_mv_table_base[i][j]);
 866             s->p_field_mv_table[i][j]=NULL;
 867         }
 868         av_freep(&s->p_field_select_table[i]);
 869     }
 870
 871     av_freep(&s->dc_val_base);
 872     av_freep(&s->ac_val_base);
 873     av_freep(&s->coded_block_base);
 874     av_freep(&s->mbintra_table);
 875     av_freep(&s->cbp_table);
 876     av_freep(&s->pred_dir_table);
 877
 878     av_freep(&s->mbskip_table);
 879     av_freep(&s->prev_pict_types);
 880     av_freep(&s->bitstream_buffer);
 881     s->allocated_bitstream_buffer_size=0;
 882
 883     av_freep(&s->avctx->stats_out);
 884     av_freep(&s->ac_stats);
 885     av_freep(&s->error_status_table);
 886     av_freep(&s->mb_index2xy);
 887     av_freep(&s->lambda_table);
 888     av_freep(&s->q_intra_matrix);
 889     av_freep(&s->q_inter_matrix);
 890     av_freep(&s->q_intra_matrix16);
 891     av_freep(&s->q_inter_matrix16);
 892     av_freep(&s->input_picture);
 893     av_freep(&s->reordered_input_picture);
 894     av_freep(&s->dct_offset);
 895
 896     if(s->picture){
 897         for(i=0; i<MAX_PICTURE_COUNT; i++){
 898             free_picture(s, &s->picture[i]);
 899         }
 900     }
 901     av_freep(&s->picture);
 902     s->context_initialized = 0;
 903     s->last_picture_ptr=
 904     s->next_picture_ptr=
 905     s->current_picture_ptr= NULL;
 906     s->linesize= s->uvlinesize= 0;
 907
 908     for(i=0; i<3; i++)
 909         av_freep(&s->visualization_buffer[i]);
 910
 911     avcodec_default_free_buffers(s->avctx);
 912 }
 913
 914 #ifdef CONFIG_ENCODERS
 915
 916 /* init video encoder */
 917 int MPV_encode_init(AVCodecContext *avctx)
 918 {
 919     MpegEncContext *s = avctx->priv_data;
 920     int i;
 921     int chroma_h_shift, chroma_v_shift;
 922
 923     MPV_encode_defaults(s);
 924
 925     switch (avctx->codec_id) {
 926     case CODEC_ID_MPEG2VIDEO:
 927         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
 928             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
 929             return -1;
 930         }
 931         break;
 932     case CODEC_ID_LJPEG:
 933     case CODEC_ID_MJPEG:
 934         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 935            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
 936             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 937             return -1;
 938         }
 939         break;
 940     default:
 941         if(avctx->pix_fmt != PIX_FMT_YUV420P){
 942             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 943             return -1;
 944         }
 945     }
 946
 947     switch (avctx->pix_fmt) {
 948     case PIX_FMT_YUVJ422P:
 949     case PIX_FMT_YUV422P:
 950         s->chroma_format = CHROMA_422;
 951         break;
 952     case PIX_FMT_YUVJ420P:
 953     case PIX_FMT_YUV420P:
 954     default:
 955         s->chroma_format = CHROMA_420;
 956         break;
 957     }
 958
 959     s->bit_rate = avctx->bit_rate;
 960     s->width = avctx->width;
 961     s->height = avctx->height;
 962     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
 963         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 964         avctx->gop_size=600;
 965     }
 966     s->gop_size = avctx->gop_size;
 967     s->avctx = avctx;
 968     s->flags= avctx->flags;
 969     s->flags2= avctx->flags2;
 970     s->max_b_frames= avctx->max_b_frames;
 971     s->codec_id= avctx->codec->id;
 972     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 973     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 974     s->strict_std_compliance= avctx->strict_std_compliance;
 975     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 976     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 977     s->mpeg_quant= avctx->mpeg_quant;
 978     s->rtp_mode= !!avctx->rtp_payload_size;
 979     s->intra_dc_precision= avctx->intra_dc_precision;
 980     s->user_specified_pts = AV_NOPTS_VALUE;
 981
 982     if (s->gop_size <= 1) {
 983         s->intra_only = 1;
 984         s->gop_size = 12;
 985     } else {
 986         s->intra_only = 0;
 987     }
 988
 989     s->me_method = avctx->me_method;
 990
 991     /* Fixed QSCALE */
 992     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 993
 994     s->adaptive_quant= (   s->avctx->lumi_masking
 995                         || s->avctx->dark_masking
 996                         || s->avctx->temporal_cplx_masking
 997                         || s->avctx->spatial_cplx_masking
 998                         || s->avctx->p_masking
 999                         || s->avctx->border_masking
1000                         || (s->flags&CODEC_FLAG_QP_RD))
1001                        && !s->fixed_qscale;
1002
1003     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1004     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1005     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1006     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1007     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
1008
1009     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1010         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1011         return -1;
1012     }
1013
1014     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1015         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1016     }
1017
1018     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1019         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
1020         return -1;
1021     }
1022
1023     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1024         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1025         return -1;
1026     }
1027
1028     if(avctx->rc_buffer_size && avctx->bit_rate*av_q2d(avctx->time_base) > avctx->rc_buffer_size){
1029         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
1030         return -1;
1031     }
1032
1033     if(avctx->bit_rate*av_q2d(avctx->time_base) > avctx->bit_rate_tolerance){
1034         av_log(avctx, AV_LOG_ERROR, "bitrate tolerance too small for bitrate\n");
1035         return -1;
1036     }
1037
1038     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1039        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1040        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1041
1042         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1043     }
1044
1045     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1046        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1047         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1048         return -1;
1049     }
1050
1051     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1052         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1053         return -1;
1054     }
1055
1056     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1057         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1058         return -1;
1059     }
1060
1061     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1062         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1063         return -1;
1064     }
1065
1066     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1067         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1068         return -1;
1069     }
1070
1071     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1072         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1073         return -1;
1074     }
1075
1076     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1077        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1078         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1079         return -1;
1080     }
1081
1082     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1083         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1084         return -1;
1085     }
1086
1087     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1088         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1089         return -1;
1090     }
1091
1092     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1093         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1094         return -1;
1095     }
1096
1097     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1098         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet, set threshold to 1000000000\n");
1099         return -1;
1100     }
1101
1102     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1103         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->flags & CODEC_FLAG_LOW_DELAY){
1108         if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
1109             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
1110             return -1;
1111         }
1112         if (s->max_b_frames != 0){
1113             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
1114             return -1;
1115         }
1116     }
1117
1118     if(s->q_scale_type == 1){
1119         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
1120             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
1121             return -1;
1122         }
1123         if(avctx->qmax > 12){
1124             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
1125             return -1;
1126         }
1127     }
1128
1129     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1130        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1131        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1132         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1133         return -1;
1134     }
1135
1136     if(s->avctx->thread_count > 1)
1137         s->rtp_mode= 1;
1138
1139     if(!avctx->time_base.den || !avctx->time_base.num){
1140         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1141         return -1;
1142     }
1143
1144     i= (INT_MAX/2+128)>>8;
1145     if(avctx->me_threshold >= i){
1146         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1147         return -1;
1148     }
1149     if(avctx->mb_threshold >= i){
1150         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1151         return -1;
1152     }
1153
1154     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1155         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1156         avctx->b_frame_strategy = 0;
1157     }
1158
1159     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1160     if(i > 1){
1161         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1162         avctx->time_base.den /= i;
1163         avctx->time_base.num /= i;
1164 //        return -1;
1165     }
1166
1167     if(s->codec_id==CODEC_ID_MJPEG){
1168         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1169         s->inter_quant_bias= 0;
1170     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1171         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1172         s->inter_quant_bias= 0;
1173     }else{
1174         s->intra_quant_bias=0;
1175         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1176     }
1177
1178     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1179         s->intra_quant_bias= avctx->intra_quant_bias;
1180     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1181         s->inter_quant_bias= avctx->inter_quant_bias;
1182
1183     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1184
1185     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1186         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1187         return -1;
1188     }
1189     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1190
1191     switch(avctx->codec->id) {
1192     case CODEC_ID_MPEG1VIDEO:
1193         s->out_format = FMT_MPEG1;
1194         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1195         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1196         break;
1197     case CODEC_ID_MPEG2VIDEO:
1198         s->out_format = FMT_MPEG1;
1199         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
1200         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1201         s->rtp_mode= 1;
1202         break;
1203     case CODEC_ID_LJPEG:
1204     case CODEC_ID_MJPEG:
1205         s->out_format = FMT_MJPEG;
1206         s->intra_only = 1; /* force intra only for jpeg */
1207         s->mjpeg_vsample[0] = 2;
1208         s->mjpeg_vsample[1] = 2>>chroma_v_shift;
1209         s->mjpeg_vsample[2] = 2>>chroma_v_shift;
1210         s->mjpeg_hsample[0] = 2;
1211         s->mjpeg_hsample[1] = 2>>chroma_h_shift;
1212         s->mjpeg_hsample[2] = 2>>chroma_h_shift;
1213         if (!(ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER)
1214             || ff_mjpeg_encode_init(s) < 0)
1215             return -1;
1216         avctx->delay=0;
1217         s->low_delay=1;
1218         break;
1219     case CODEC_ID_H261:
1220         if (!ENABLE_H261_ENCODER)  return -1;
1221         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
1222             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
1223             return -1;
1224         }
1225         s->out_format = FMT_H261;
1226         avctx->delay=0;
1227         s->low_delay=1;
1228         break;
1229     case CODEC_ID_H263:
1230         if (h263_get_picture_format(s->width, s->height) == 7) {
1231             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1232             return -1;
1233         }
1234         s->out_format = FMT_H263;
1235         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1236         avctx->delay=0;
1237         s->low_delay=1;
1238         break;
1239     case CODEC_ID_H263P:
1240         s->out_format = FMT_H263;
1241         s->h263_plus = 1;
1242         /* Fx */
1243         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1244         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
1245         s->modified_quant= s->h263_aic;
1246         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1247         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1248         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1249         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1250         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1251
1252         /* /Fx */
1253         /* These are just to be sure */
1254         avctx->delay=0;
1255         s->low_delay=1;
1256         break;
1257     case CODEC_ID_FLV1:
1258         s->out_format = FMT_H263;
1259         s->h263_flv = 2; /* format = 1; 11-bit codes */
1260         s->unrestricted_mv = 1;
1261         s->rtp_mode=0; /* don't allow GOB */
1262         avctx->delay=0;
1263         s->low_delay=1;
1264         break;
1265     case CODEC_ID_RV10:
1266         s->out_format = FMT_H263;
1267         avctx->delay=0;
1268         s->low_delay=1;
1269         break;
1270     case CODEC_ID_RV20:
1271         s->out_format = FMT_H263;
1272         avctx->delay=0;
1273         s->low_delay=1;
1274         s->modified_quant=1;
1275         s->h263_aic=1;
1276         s->h263_plus=1;
1277         s->loop_filter=1;
1278         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1279         break;
1280     case CODEC_ID_MPEG4:
1281         s->out_format = FMT_H263;
1282         s->h263_pred = 1;
1283         s->unrestricted_mv = 1;
1284         s->low_delay= s->max_b_frames ? 0 : 1;
1285         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1286         break;
1287     case CODEC_ID_MSMPEG4V1:
1288         s->out_format = FMT_H263;
1289         s->h263_msmpeg4 = 1;
1290         s->h263_pred = 1;
1291         s->unrestricted_mv = 1;
1292         s->msmpeg4_version= 1;
1293         avctx->delay=0;
1294         s->low_delay=1;
1295         break;
1296     case CODEC_ID_MSMPEG4V2:
1297         s->out_format = FMT_H263;
1298         s->h263_msmpeg4 = 1;
1299         s->h263_pred = 1;
1300         s->unrestricted_mv = 1;
1301         s->msmpeg4_version= 2;
1302         avctx->delay=0;
1303         s->low_delay=1;
1304         break;
1305     case CODEC_ID_MSMPEG4V3:
1306         s->out_format = FMT_H263;
1307         s->h263_msmpeg4 = 1;
1308         s->h263_pred = 1;
1309         s->unrestricted_mv = 1;
1310         s->msmpeg4_version= 3;
1311         s->flipflop_rounding=1;
1312         avctx->delay=0;
1313         s->low_delay=1;
1314         break;
1315     case CODEC_ID_WMV1:
1316         s->out_format = FMT_H263;
1317         s->h263_msmpeg4 = 1;
1318         s->h263_pred = 1;
1319         s->unrestricted_mv = 1;
1320         s->msmpeg4_version= 4;
1321         s->flipflop_rounding=1;
1322         avctx->delay=0;
1323         s->low_delay=1;
1324         break;
1325     case CODEC_ID_WMV2:
1326         s->out_format = FMT_H263;
1327         s->h263_msmpeg4 = 1;
1328         s->h263_pred = 1;
1329         s->unrestricted_mv = 1;
1330         s->msmpeg4_version= 5;
1331         s->flipflop_rounding=1;
1332         avctx->delay=0;
1333         s->low_delay=1;
1334         break;
1335     default:
1336         return -1;
1337     }
1338
1339     avctx->has_b_frames= !s->low_delay;
1340
1341     s->encoding = 1;
1342
1343     /* init */
1344     if (MPV_common_init(s) < 0)
1345         return -1;
1346
1347     if(s->modified_quant)
1348         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1349     s->progressive_frame=
1350     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
1351     s->quant_precision=5;
1352
1353     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1354     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1355
1356     if (ENABLE_H261_ENCODER && s->out_format == FMT_H261)
1357         ff_h261_encode_init(s);
1358     if (s->out_format == FMT_H263)
1359         h263_encode_init(s);
1360     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version)
1361         ff_msmpeg4_encode_init(s);
1362     if (s->out_format == FMT_MPEG1)
1363         ff_mpeg1_encode_init(s);
1364
1365     /* init q matrix */
1366     for(i=0;i<64;i++) {
1367         int j= s->dsp.idct_permutation[i];
1368         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1369             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1370             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1371         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1372             s->intra_matrix[j] =
1373             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1374         }else
1375         { /* mpeg1/2 */
1376             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1377             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1378         }
1379         if(s->avctx->intra_matrix)
1380             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1381         if(s->avctx->inter_matrix)
1382             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1383     }
1384
1385     /* precompute matrix */
1386     /* for mjpeg, we do include qscale in the matrix */
1387     if (s->out_format != FMT_MJPEG) {
1388         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1389                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1390         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1391                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1392     }
1393
1394     if(ff_rate_control_init(s) < 0)
1395         return -1;
1396
1397     return 0;
1398 }
1399
1400 int MPV_encode_end(AVCodecContext *avctx)
1401 {
1402     MpegEncContext *s = avctx->priv_data;
1403
1404     ff_rate_control_uninit(s);
1405
1406     MPV_common_end(s);
1407     if ((ENABLE_MJPEG_ENCODER || ENABLE_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
1408         ff_mjpeg_encode_close(s);
1409
1410     av_freep(&avctx->extradata);
1411
1412     return 0;
1413 }
1414
1415 #endif //CONFIG_ENCODERS
1416
1417 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
1418 {
1419     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1420     uint8_t index_run[MAX_RUN+1];
1421     int last, run, level, start, end, i;
1422
1423     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1424     if(static_store && rl->max_level[0])
1425         return;
1426
1427     /* compute max_level[], max_run[] and index_run[] */
1428     for(last=0;last<2;last++) {
1429         if (last == 0) {
1430             start = 0;
1431             end = rl->last;
1432         } else {
1433             start = rl->last;
1434             end = rl->n;
1435         }
1436
1437         memset(max_level, 0, MAX_RUN + 1);
1438         memset(max_run, 0, MAX_LEVEL + 1);
1439         memset(index_run, rl->n, MAX_RUN + 1);
1440         for(i=start;i<end;i++) {
1441             run = rl->table_run[i];
1442             level = rl->table_level[i];
1443             if (index_run[run] == rl->n)
1444                 index_run[run] = i;
1445             if (level > max_level[run])
1446                 max_level[run] = level;
1447             if (run > max_run[level])
1448                 max_run[level] = run;
1449         }
1450         if(static_store)
1451             rl->max_level[last] = static_store[last];
1452         else
1453             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1454         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1455         if(static_store)
1456             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
1457         else
1458             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1459         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1460         if(static_store)
1461             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1462         else
1463             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1464         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1465     }
1466 }
1467
1468 /* draw the edges of width 'w' of an image of size width, height */
1469 //FIXME check that this is ok for mpeg4 interlaced
1470 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1471 {
1472     uint8_t *ptr, *last_line;
1473     int i;
1474
1475     last_line = buf + (height - 1) * wrap;
1476     for(i=0;i<w;i++) {
1477         /* top and bottom */
1478         memcpy(buf - (i + 1) * wrap, buf, width);
1479         memcpy(last_line + (i + 1) * wrap, last_line, width);
1480     }
1481     /* left and right */
1482     ptr = buf;
1483     for(i=0;i<height;i++) {
1484         memset(ptr - w, ptr[0], w);
1485         memset(ptr + width, ptr[width-1], w);
1486         ptr += wrap;
1487     }
1488     /* corners */
1489     for(i=0;i<w;i++) {
1490         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1491         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1492         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1493         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1494     }
1495 }
1496
1497 int ff_find_unused_picture(MpegEncContext *s, int shared){
1498     int i;
1499
1500     if(shared){
1501         for(i=0; i<MAX_PICTURE_COUNT; i++){
1502             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1503         }
1504     }else{
1505         for(i=0; i<MAX_PICTURE_COUNT; i++){
1506             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1507         }
1508         for(i=0; i<MAX_PICTURE_COUNT; i++){
1509             if(s->picture[i].data[0]==NULL) return i;
1510         }
1511     }
1512
1513     assert(0);
1514     return -1;
1515 }
1516
1517 static void update_noise_reduction(MpegEncContext *s){
1518     int intra, i;
1519
1520     for(intra=0; intra<2; intra++){
1521         if(s->dct_count[intra] > (1<<16)){
1522             for(i=0; i<64; i++){
1523                 s->dct_error_sum[intra][i] >>=1;
1524             }
1525             s->dct_count[intra] >>= 1;
1526         }
1527
1528         for(i=0; i<64; i++){
1529             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1530         }
1531     }
1532 }
1533
1534 /**
1535  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1536  */
1537 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1538 {
1539     int i;
1540     AVFrame *pic;
1541     s->mb_skipped = 0;
1542
1543     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1544
1545     /* mark&release old frames */
1546     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1547       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1548         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1549
1550         /* release forgotten pictures */
1551         /* if(mpeg124/h263) */
1552         if(!s->encoding){
1553             for(i=0; i<MAX_PICTURE_COUNT; i++){
1554                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1555                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1556                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1557                 }
1558             }
1559         }
1560       }
1561     }
1562 alloc:
1563     if(!s->encoding){
1564         /* release non reference frames */
1565         for(i=0; i<MAX_PICTURE_COUNT; i++){
1566             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1567                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1568             }
1569         }
1570
1571         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1572             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1573         else{
1574             i= ff_find_unused_picture(s, 0);
1575             pic= (AVFrame*)&s->picture[i];
1576         }
1577
1578         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1579                         && !s->dropable ? 3 : 0;
1580
1581         pic->coded_picture_number= s->coded_picture_number++;
1582
1583         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1584             return -1;
1585
1586         s->current_picture_ptr= (Picture*)pic;
1587         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1588         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1589     }
1590
1591     s->current_picture_ptr->pict_type= s->pict_type;
1592 //    if(s->flags && CODEC_FLAG_QSCALE)
1593   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1594     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1595
1596     copy_picture(&s->current_picture, s->current_picture_ptr);
1597
1598     if (s->pict_type != B_TYPE) {
1599         s->last_picture_ptr= s->next_picture_ptr;
1600         if(!s->dropable)
1601             s->next_picture_ptr= s->current_picture_ptr;
1602     }
1603 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1604         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1605         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1606         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1607         s->pict_type, s->dropable);*/
1608
1609     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1610     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1611
1612     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
1613         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1614         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1615         goto alloc;
1616     }
1617
1618     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1619
1620     if(s->picture_structure!=PICT_FRAME){
1621         int i;
1622         for(i=0; i<4; i++){
1623             if(s->picture_structure == PICT_BOTTOM_FIELD){
1624                  s->current_picture.data[i] += s->current_picture.linesize[i];
1625             }
1626             s->current_picture.linesize[i] *= 2;
1627             s->last_picture.linesize[i] *=2;
1628             s->next_picture.linesize[i] *=2;
1629         }
1630     }
1631
1632     s->hurry_up= s->avctx->hurry_up;
1633     s->error_resilience= avctx->error_resilience;
1634
1635     /* set dequantizer, we can't do it during init as it might change for mpeg4
1636        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1637     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1638         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1639         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1640     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1641         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1642         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1643     }else{
1644         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1645         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1646     }
1647
1648     if(s->dct_error_sum){
1649         assert(s->avctx->noise_reduction && s->encoding);
1650
1651         update_noise_reduction(s);
1652     }
1653
1654 #ifdef HAVE_XVMC
1655     if(s->avctx->xvmc_acceleration)
1656         return XVMC_field_start(s, avctx);
1657 #endif
1658     return 0;
1659 }
1660
1661 /* generic function for encode/decode called after a frame has been coded/decoded */
1662 void MPV_frame_end(MpegEncContext *s)
1663 {
1664     int i;
1665     /* draw edge for correct motion prediction if outside */
1666 #ifdef HAVE_XVMC
1667 //just to make sure that all data is rendered.
1668     if(s->avctx->xvmc_acceleration){
1669         XVMC_field_end(s);
1670     }else
1671 #endif
1672     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1673             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1674             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1675             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1676     }
1677     emms_c();
1678
1679     s->last_pict_type    = s->pict_type;
1680     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1681     if(s->pict_type!=B_TYPE){
1682         s->last_non_b_pict_type= s->pict_type;
1683     }
1684 #if 0
1685         /* copy back current_picture variables */
1686     for(i=0; i<MAX_PICTURE_COUNT; i++){
1687         if(s->picture[i].data[0] == s->current_picture.data[0]){
1688             s->picture[i]= s->current_picture;
1689             break;
1690         }
1691     }
1692     assert(i<MAX_PICTURE_COUNT);
1693 #endif
1694
1695     if(s->encoding){
1696         /* release non-reference frames */
1697         for(i=0; i<MAX_PICTURE_COUNT; i++){
1698             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1699                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1700             }
1701         }
1702     }
1703     // clear copies, to avoid confusion
1704 #if 0
1705     memset(&s->last_picture, 0, sizeof(Picture));
1706     memset(&s->next_picture, 0, sizeof(Picture));
1707     memset(&s->current_picture, 0, sizeof(Picture));
1708 #endif
1709     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1710 }
1711
1712 /**
1713  * draws an line from (ex, ey) -> (sx, sy).
1714  * @param w width of the image
1715  * @param h height of the image
1716  * @param stride stride/linesize of the image
1717  * @param color color of the arrow
1718  */
1719 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1720     int x, y, fr, f;
1721
1722     sx= av_clip(sx, 0, w-1);
1723     sy= av_clip(sy, 0, h-1);
1724     ex= av_clip(ex, 0, w-1);
1725     ey= av_clip(ey, 0, h-1);
1726
1727     buf[sy*stride + sx]+= color;
1728
1729     if(FFABS(ex - sx) > FFABS(ey - sy)){
1730         if(sx > ex){
1731             FFSWAP(int, sx, ex);
1732             FFSWAP(int, sy, ey);
1733         }
1734         buf+= sx + sy*stride;
1735         ex-= sx;
1736         f= ((ey-sy)<<16)/ex;
1737         for(x= 0; x <= ex; x++){
1738             y = (x*f)>>16;
1739             fr= (x*f)&0xFFFF;
1740             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1741             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1742         }
1743     }else{
1744         if(sy > ey){
1745             FFSWAP(int, sx, ex);
1746             FFSWAP(int, sy, ey);
1747         }
1748         buf+= sx + sy*stride;
1749         ey-= sy;
1750         if(ey) f= ((ex-sx)<<16)/ey;
1751         else   f= 0;
1752         for(y= 0; y <= ey; y++){
1753             x = (y*f)>>16;
1754             fr= (y*f)&0xFFFF;
1755             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1756             buf[y*stride + x+1]+= (color*         fr )>>16;;
1757         }
1758     }
1759 }
1760
1761 /**
1762  * draws an arrow from (ex, ey) -> (sx, sy).
1763  * @param w width of the image
1764  * @param h height of the image
1765  * @param stride stride/linesize of the image
1766  * @param color color of the arrow
1767  */
1768 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1769     int dx,dy;
1770
1771     sx= av_clip(sx, -100, w+100);
1772     sy= av_clip(sy, -100, h+100);
1773     ex= av_clip(ex, -100, w+100);
1774     ey= av_clip(ey, -100, h+100);
1775
1776     dx= ex - sx;
1777     dy= ey - sy;
1778
1779     if(dx*dx + dy*dy > 3*3){
1780         int rx=  dx + dy;
1781         int ry= -dx + dy;
1782         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1783
1784         //FIXME subpixel accuracy
1785         rx= ROUNDED_DIV(rx*3<<4, length);
1786         ry= ROUNDED_DIV(ry*3<<4, length);
1787
1788         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1789         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1790     }
1791     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1792 }
1793
1794 /**
1795  * prints debuging info for the given picture.
1796  */
1797 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1798
1799     if(!pict || !pict->mb_type) return;
1800
1801     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1802         int x,y;
1803
1804         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1805         switch (pict->pict_type) {
1806             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1807             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1808             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1809             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1810             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1811             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1812         }
1813         for(y=0; y<s->mb_height; y++){
1814             for(x=0; x<s->mb_width; x++){
1815                 if(s->avctx->debug&FF_DEBUG_SKIP){
1816                     int count= s->mbskip_table[x + y*s->mb_stride];
1817                     if(count>9) count=9;
1818                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1819                 }
1820                 if(s->avctx->debug&FF_DEBUG_QP){
1821                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1822                 }
1823                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1824                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1825                     //Type & MV direction
1826                     if(IS_PCM(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1828                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1830                     else if(IS_INTRA4x4(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1832                     else if(IS_INTRA16x16(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1834                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1836                     else if(IS_DIRECT(mb_type))
1837                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1838                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1839                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1840                     else if(IS_GMC(mb_type))
1841                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1842                     else if(IS_SKIP(mb_type))
1843                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1844                     else if(!USES_LIST(mb_type, 1))
1845                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1846                     else if(!USES_LIST(mb_type, 0))
1847                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1848                     else{
1849                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1850                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1851                     }
1852
1853                     //segmentation
1854                     if(IS_8X8(mb_type))
1855                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1856                     else if(IS_16X8(mb_type))
1857                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1858                     else if(IS_8X16(mb_type))
1859                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1860                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1861                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1862                     else
1863                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1864
1865
1866                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1867                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1868                     else
1869                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1870                 }
1871 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1872             }
1873             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1874         }
1875     }
1876
1877     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1878         const int shift= 1 + s->quarter_sample;
1879         int mb_y;
1880         uint8_t *ptr;
1881         int i;
1882         int h_chroma_shift, v_chroma_shift;
1883         const int width = s->avctx->width;
1884         const int height= s->avctx->height;
1885         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1886         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1887         s->low_delay=0; //needed to see the vectors without trashing the buffers
1888
1889         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1890         for(i=0; i<3; i++){
1891             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1892             pict->data[i]= s->visualization_buffer[i];
1893         }
1894         pict->type= FF_BUFFER_TYPE_COPY;
1895         ptr= pict->data[0];
1896
1897         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1898             int mb_x;
1899             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1900                 const int mb_index= mb_x + mb_y*s->mb_stride;
1901                 if((s->avctx->debug_mv) && pict->motion_val){
1902                   int type;
1903                   for(type=0; type<3; type++){
1904                     int direction = 0;
1905                     switch (type) {
1906                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1907                                 continue;
1908                               direction = 0;
1909                               break;
1910                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1911                                 continue;
1912                               direction = 0;
1913                               break;
1914                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1915                                 continue;
1916                               direction = 1;
1917                               break;
1918                     }
1919                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1920                         continue;
1921
1922                     if(IS_8X8(pict->mb_type[mb_index])){
1923                       int i;
1924                       for(i=0; i<4; i++){
1925                         int sx= mb_x*16 + 4 + 8*(i&1);
1926                         int sy= mb_y*16 + 4 + 8*(i>>1);
1927                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1928                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1929                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1930                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1931                       }
1932                     }else if(IS_16X8(pict->mb_type[mb_index])){
1933                       int i;
1934                       for(i=0; i<2; i++){
1935                         int sx=mb_x*16 + 8;
1936                         int sy=mb_y*16 + 4 + 8*i;
1937                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1938                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1939                         int my=(pict->motion_val[direction][xy][1]>>shift);
1940
1941                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1942                             my*=2;
1943
1944                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1945                       }
1946                     }else if(IS_8X16(pict->mb_type[mb_index])){
1947                       int i;
1948                       for(i=0; i<2; i++){
1949                         int sx=mb_x*16 + 4 + 8*i;
1950                         int sy=mb_y*16 + 8;
1951                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1952                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1953                         int my=(pict->motion_val[direction][xy][1]>>shift);
1954
1955                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1956                             my*=2;
1957
1958                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1959                       }
1960                     }else{
1961                       int sx= mb_x*16 + 8;
1962                       int sy= mb_y*16 + 8;
1963                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1964                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1965                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1966                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1967                     }
1968                   }
1969                 }
1970                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1971                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1972                     int y;
1973                     for(y=0; y<8; y++){
1974                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1975                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1976                     }
1977                 }
1978                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1979                     int mb_type= pict->mb_type[mb_index];
1980                     uint64_t u,v;
1981                     int y;
1982 #define COLOR(theta, r)\
1983 u= (int)(128 + r*cos(theta*3.141592/180));\
1984 v= (int)(128 + r*sin(theta*3.141592/180));
1985
1986
1987                     u=v=128;
1988                     if(IS_PCM(mb_type)){
1989                         COLOR(120,48)
1990                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1991                         COLOR(30,48)
1992                     }else if(IS_INTRA4x4(mb_type)){
1993                         COLOR(90,48)
1994                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1995 //                        COLOR(120,48)
1996                     }else if(IS_DIRECT(mb_type)){
1997                         COLOR(150,48)
1998                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1999                         COLOR(170,48)
2000                     }else if(IS_GMC(mb_type)){
2001                         COLOR(190,48)
2002                     }else if(IS_SKIP(mb_type)){
2003 //                        COLOR(180,48)
2004                     }else if(!USES_LIST(mb_type, 1)){
2005                         COLOR(240,48)
2006                     }else if(!USES_LIST(mb_type, 0)){
2007                         COLOR(0,48)
2008                     }else{
2009                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
2010                         COLOR(300,48)
2011                     }
2012
2013                     u*= 0x0101010101010101ULL;
2014                     v*= 0x0101010101010101ULL;
2015                     for(y=0; y<8; y++){
2016                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
2017                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
2018                     }
2019
2020                     //segmentation
2021                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
2022                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2023                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
2024                     }
2025                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2026                         for(y=0; y<16; y++)
2027                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2028                     }
2029                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2030                         int dm= 1 << (mv_sample_log2-2);
2031                         for(i=0; i<4; i++){
2032                             int sx= mb_x*16 + 8*(i&1);
2033                             int sy= mb_y*16 + 8*(i>>1);
2034                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2035                             //FIXME bidir
2036                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2037                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2038                                 for(y=0; y<8; y++)
2039                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2040                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2041                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2042                         }
2043                     }
2044
2045                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2046                         // hmm
2047                     }
2048                 }
2049                 s->mbskip_table[mb_index]=0;
2050             }
2051         }
2052     }
2053 }
2054
2055 #ifdef CONFIG_ENCODERS
2056
2057 static int get_sae(uint8_t *src, int ref, int stride){
2058     int x,y;
2059     int acc=0;
2060
2061     for(y=0; y<16; y++){
2062         for(x=0; x<16; x++){
2063             acc+= FFABS(src[x+y*stride] - ref);
2064         }
2065     }
2066
2067     return acc;
2068 }
2069
2070 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2071     int x, y, w, h;
2072     int acc=0;
2073
2074     w= s->width &~15;
2075     h= s->height&~15;
2076
2077     for(y=0; y<h; y+=16){
2078         for(x=0; x<w; x+=16){
2079             int offset= x + y*stride;
2080             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2081             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2082             int sae = get_sae(src + offset, mean, stride);
2083
2084             acc+= sae + 500 < sad;
2085         }
2086     }
2087     return acc;
2088 }
2089
2090
2091 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2092     AVFrame *pic=NULL;
2093     int64_t pts;
2094     int i;
2095     const int encoding_delay= s->max_b_frames;
2096     int direct=1;
2097
2098     if(pic_arg){
2099         pts= pic_arg->pts;
2100         pic_arg->display_picture_number= s->input_picture_number++;
2101
2102         if(pts != AV_NOPTS_VALUE){
2103             if(s->user_specified_pts != AV_NOPTS_VALUE){
2104                 int64_t time= pts;
2105                 int64_t last= s->user_specified_pts;
2106
2107                 if(time <= last){
2108                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2109                     return -1;
2110                 }
2111             }
2112             s->user_specified_pts= pts;
2113         }else{
2114             if(s->user_specified_pts != AV_NOPTS_VALUE){
2115                 s->user_specified_pts=
2116                 pts= s->user_specified_pts + 1;
2117                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2118             }else{
2119                 pts= pic_arg->display_picture_number;
2120             }
2121         }
2122     }
2123
2124   if(pic_arg){
2125     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2126     if(pic_arg->linesize[0] != s->linesize) direct=0;
2127     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2128     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2129
2130 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2131
2132     if(direct){
2133         i= ff_find_unused_picture(s, 1);
2134
2135         pic= (AVFrame*)&s->picture[i];
2136         pic->reference= 3;
2137
2138         for(i=0; i<4; i++){
2139             pic->data[i]= pic_arg->data[i];
2140             pic->linesize[i]= pic_arg->linesize[i];
2141         }
2142         alloc_picture(s, (Picture*)pic, 1);
2143     }else{
2144         i= ff_find_unused_picture(s, 0);
2145
2146         pic= (AVFrame*)&s->picture[i];
2147         pic->reference= 3;
2148
2149         alloc_picture(s, (Picture*)pic, 0);
2150
2151         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2152            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2153            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2154        // empty
2155         }else{
2156             int h_chroma_shift, v_chroma_shift;
2157             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2158
2159             for(i=0; i<3; i++){
2160                 int src_stride= pic_arg->linesize[i];
2161                 int dst_stride= i ? s->uvlinesize : s->linesize;
2162                 int h_shift= i ? h_chroma_shift : 0;
2163                 int v_shift= i ? v_chroma_shift : 0;
2164                 int w= s->width >>h_shift;
2165                 int h= s->height>>v_shift;
2166                 uint8_t *src= pic_arg->data[i];
2167                 uint8_t *dst= pic->data[i];
2168
2169                 if(!s->avctx->rc_buffer_size)
2170                     dst +=INPLACE_OFFSET;
2171
2172                 if(src_stride==dst_stride)
2173                     memcpy(dst, src, src_stride*h);
2174                 else{
2175                     while(h--){
2176                         memcpy(dst, src, w);
2177                         dst += dst_stride;
2178                         src += src_stride;
2179                     }
2180                 }
2181             }
2182         }
2183     }
2184     copy_picture_attributes(s, pic, pic_arg);
2185     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2186   }
2187
2188     /* shift buffer entries */
2189     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2190         s->input_picture[i-1]= s->input_picture[i];
2191
2192     s->input_picture[encoding_delay]= (Picture*)pic;
2193
2194     return 0;
2195 }
2196
2197 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2198     int x, y, plane;
2199     int score=0;
2200     int64_t score64=0;
2201
2202     for(plane=0; plane<3; plane++){
2203         const int stride= p->linesize[plane];
2204         const int bw= plane ? 1 : 2;
2205         for(y=0; y<s->mb_height*bw; y++){
2206             for(x=0; x<s->mb_width*bw; x++){
2207                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2208                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2209
2210                 switch(s->avctx->frame_skip_exp){
2211                     case 0: score= FFMAX(score, v); break;
2212                     case 1: score+= FFABS(v);break;
2213                     case 2: score+= v*v;break;
2214                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
2215                     case 4: score64+= v*v*(int64_t)(v*v);break;
2216                 }
2217             }
2218         }
2219     }
2220
2221     if(score) score64= score;
2222
2223     if(score64 < s->avctx->frame_skip_threshold)
2224         return 1;
2225     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2226         return 1;
2227     return 0;
2228 }
2229
2230 static int estimate_best_b_count(MpegEncContext *s){
2231     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2232     AVCodecContext *c= avcodec_alloc_context();
2233     AVFrame input[FF_MAX_B_FRAMES+2];
2234     const int scale= s->avctx->brd_scale;
2235     int i, j, out_size, p_lambda, b_lambda, lambda2;
2236     int outbuf_size= s->width * s->height; //FIXME
2237     uint8_t *outbuf= av_malloc(outbuf_size);
2238     int64_t best_rd= INT64_MAX;
2239     int best_b_count= -1;
2240
2241     assert(scale>=0 && scale <=3);
2242
2243 //    emms_c();
2244     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2245     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2246     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2247     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2248
2249     c->width = s->width >> scale;
2250     c->height= s->height>> scale;
2251     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2252     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2253     c->mb_decision= s->avctx->mb_decision;
2254     c->me_cmp= s->avctx->me_cmp;
2255     c->mb_cmp= s->avctx->mb_cmp;
2256     c->me_sub_cmp= s->avctx->me_sub_cmp;
2257     c->pix_fmt = PIX_FMT_YUV420P;
2258     c->time_base= s->avctx->time_base;
2259     c->max_b_frames= s->max_b_frames;
2260
2261     if (avcodec_open(c, codec) < 0)
2262         return -1;
2263
2264     for(i=0; i<s->max_b_frames+2; i++){
2265         int ysize= c->width*c->height;
2266         int csize= (c->width/2)*(c->height/2);
2267         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2268
2269         avcodec_get_frame_defaults(&input[i]);
2270         input[i].data[0]= av_malloc(ysize + 2*csize);
2271         input[i].data[1]= input[i].data[0] + ysize;
2272         input[i].data[2]= input[i].data[1] + csize;
2273         input[i].linesize[0]= c->width;
2274         input[i].linesize[1]=
2275         input[i].linesize[2]= c->width/2;
2276
2277         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
2278             pre_input= *pre_input_ptr;
2279
2280             if(pre_input.type != FF_BUFFER_TYPE_SHARED && i) {
2281                 pre_input.data[0]+=INPLACE_OFFSET;
2282                 pre_input.data[1]+=INPLACE_OFFSET;
2283                 pre_input.data[2]+=INPLACE_OFFSET;
2284             }
2285
2286             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2287             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2288             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2289         }
2290     }
2291
2292     for(j=0; j<s->max_b_frames+1; j++){
2293         int64_t rd=0;
2294
2295         if(!s->input_picture[j])
2296             break;
2297
2298         c->error[0]= c->error[1]= c->error[2]= 0;
2299
2300         input[0].pict_type= I_TYPE;
2301         input[0].quality= 1 * FF_QP2LAMBDA;
2302         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2303 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2304
2305         for(i=0; i<s->max_b_frames+1; i++){
2306             int is_p= i % (j+1) == j || i==s->max_b_frames;
2307
2308             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2309             input[i+1].quality= is_p ? p_lambda : b_lambda;
2310             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2311             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2312         }
2313
2314         /* get the delayed frames */
2315         while(out_size){
2316             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2317             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2318         }
2319
2320         rd += c->error[0] + c->error[1] + c->error[2];
2321
2322         if(rd < best_rd){
2323             best_rd= rd;
2324             best_b_count= j;
2325         }
2326     }
2327
2328     av_freep(&outbuf);
2329     avcodec_close(c);
2330     av_freep(&c);
2331
2332     for(i=0; i<s->max_b_frames+2; i++){
2333         av_freep(&input[i].data[0]);
2334     }
2335
2336     return best_b_count;
2337 }
2338
2339 static void select_input_picture(MpegEncContext *s){
2340     int i;
2341
2342     for(i=1; i<MAX_PICTURE_COUNT; i++)
2343         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2344     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2345
2346     /* set next picture type & ordering */
2347     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2348         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2349             s->reordered_input_picture[0]= s->input_picture[0];
2350             s->reordered_input_picture[0]->pict_type= I_TYPE;
2351             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2352         }else{
2353             int b_frames;
2354
2355             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2356                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2357                 //FIXME check that te gop check above is +-1 correct
2358 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2359
2360                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2361                         for(i=0; i<4; i++)
2362                             s->input_picture[0]->data[i]= NULL;
2363                         s->input_picture[0]->type= 0;
2364                     }else{
2365                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2366                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2367
2368                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2369                     }
2370
2371                     emms_c();
2372                     ff_vbv_update(s, 0);
2373
2374                     goto no_output_pic;
2375                 }
2376             }
2377
2378             if(s->flags&CODEC_FLAG_PASS2){
2379                 for(i=0; i<s->max_b_frames+1; i++){
2380                     int pict_num= s->input_picture[0]->display_picture_number + i;
2381
2382                     if(pict_num >= s->rc_context.num_entries)
2383                         break;
2384                     if(!s->input_picture[i]){
2385                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2386                         break;
2387                     }
2388
2389                     s->input_picture[i]->pict_type=
2390                         s->rc_context.entry[pict_num].new_pict_type;
2391                 }
2392             }
2393
2394             if(s->avctx->b_frame_strategy==0){
2395                 b_frames= s->max_b_frames;
2396                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2397             }else if(s->avctx->b_frame_strategy==1){
2398                 for(i=1; i<s->max_b_frames+1; i++){
2399                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2400                         s->input_picture[i]->b_frame_score=
2401                             get_intra_count(s, s->input_picture[i  ]->data[0],
2402                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2403                     }
2404                 }
2405                 for(i=0; i<s->max_b_frames+1; i++){
2406                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2407                 }
2408
2409                 b_frames= FFMAX(0, i-1);
2410
2411                 /* reset scores */
2412                 for(i=0; i<b_frames+1; i++){
2413                     s->input_picture[i]->b_frame_score=0;
2414                 }
2415             }else if(s->avctx->b_frame_strategy==2){
2416                 b_frames= estimate_best_b_count(s);
2417             }else{
2418                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2419                 b_frames=0;
2420             }
2421
2422             emms_c();
2423 //static int b_count=0;
2424 //b_count+= b_frames;
2425 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2426
2427             for(i= b_frames - 1; i>=0; i--){
2428                 int type= s->input_picture[i]->pict_type;
2429                 if(type && type != B_TYPE)
2430                     b_frames= i;
2431             }
2432             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2433                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2434             }
2435
2436             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2437               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2438                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2439               }else{
2440                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2441                     b_frames=0;
2442                 s->input_picture[b_frames]->pict_type= I_TYPE;
2443               }
2444             }
2445
2446             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2447                && b_frames
2448                && s->input_picture[b_frames]->pict_type== I_TYPE)
2449                 b_frames--;
2450
2451             s->reordered_input_picture[0]= s->input_picture[b_frames];
2452             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2453                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2454             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2455             for(i=0; i<b_frames; i++){
2456                 s->reordered_input_picture[i+1]= s->input_picture[i];
2457                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2458                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2459             }
2460         }
2461     }
2462 no_output_pic:
2463     if(s->reordered_input_picture[0]){
2464         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2465
2466         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2467
2468         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
2469             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2470
2471             int i= ff_find_unused_picture(s, 0);
2472             Picture *pic= &s->picture[i];
2473
2474             pic->reference              = s->reordered_input_picture[0]->reference;
2475             alloc_picture(s, pic, 0);
2476
2477             /* mark us unused / free shared pic */
2478             if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
2479                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
2480             for(i=0; i<4; i++)
2481                 s->reordered_input_picture[0]->data[i]= NULL;
2482             s->reordered_input_picture[0]->type= 0;
2483
2484             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2485
2486             s->current_picture_ptr= pic;
2487         }else{
2488             // input is not a shared pix -> reuse buffer for current_pix
2489
2490             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2491                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2492
2493             s->current_picture_ptr= s->reordered_input_picture[0];
2494             for(i=0; i<4; i++){
2495                 s->new_picture.data[i]+= INPLACE_OFFSET;
2496             }
2497         }
2498         copy_picture(&s->current_picture, s->current_picture_ptr);
2499
2500         s->picture_number= s->new_picture.display_picture_number;
2501 //printf("dpn:%d\n", s->picture_number);
2502     }else{
2503        memset(&s->new_picture, 0, sizeof(Picture));
2504     }
2505 }
2506
2507 int MPV_encode_picture(AVCodecContext *avctx,
2508                        unsigned char *buf, int buf_size, void *data)
2509 {
2510     MpegEncContext *s = avctx->priv_data;
2511     AVFrame *pic_arg = data;
2512     int i, stuffing_count;
2513
2514     for(i=0; i<avctx->thread_count; i++){
2515         int start_y= s->thread_context[i]->start_mb_y;
2516         int   end_y= s->thread_context[i]->  end_mb_y;
2517         int h= s->mb_height;
2518         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2519         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2520
2521         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2522     }
2523
2524     s->picture_in_gop_number++;
2525
2526     if(load_input_picture(s, pic_arg) < 0)
2527         return -1;
2528
2529     select_input_picture(s);
2530
2531     /* output? */
2532     if(s->new_picture.data[0]){
2533         s->pict_type= s->new_picture.pict_type;
2534 //emms_c();
2535 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2536         MPV_frame_start(s, avctx);
2537 vbv_retry:
2538         if (encode_picture(s, s->picture_number) < 0)
2539             return -1;
2540
2541         avctx->real_pict_num  = s->picture_number;
2542         avctx->header_bits = s->header_bits;
2543         avctx->mv_bits     = s->mv_bits;
2544         avctx->misc_bits   = s->misc_bits;
2545         avctx->i_tex_bits  = s->i_tex_bits;
2546         avctx->p_tex_bits  = s->p_tex_bits;
2547         avctx->i_count     = s->i_count;
2548         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2549         avctx->skip_count  = s->skip_count;
2550
2551         MPV_frame_end(s);
2552
2553         if (ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
2554             ff_mjpeg_encode_picture_trailer(s);
2555
2556         if(avctx->rc_buffer_size){
2557             RateControlContext *rcc= &s->rc_context;
2558             int max_size= rcc->buffer_index/3;
2559
2560             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
2561                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
2562                 if(s->adaptive_quant){
2563                     int i;
2564                     for(i=0; i<s->mb_height*s->mb_stride; i++)
2565                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
2566                 }
2567                 s->mb_skipped = 0;        //done in MPV_frame_start()
2568                 if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
2569                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2570                         s->no_rounding ^= 1;
2571                 }
2572                 if(s->pict_type!=B_TYPE){
2573                     s->time_base= s->last_time_base;
2574                     s->last_non_b_time= s->time - s->pp_time;
2575                 }
2576 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
2577                 for(i=0; i<avctx->thread_count; i++){
2578                     PutBitContext *pb= &s->thread_context[i]->pb;
2579                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
2580                 }
2581                 goto vbv_retry;
2582             }
2583
2584             assert(s->avctx->rc_max_rate);
2585         }
2586
2587         if(s->flags&CODEC_FLAG_PASS1)
2588             ff_write_pass1_stats(s);
2589
2590         for(i=0; i<4; i++){
2591             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2592             avctx->error[i] += s->current_picture_ptr->error[i];
2593         }
2594
2595         if(s->flags&CODEC_FLAG_PASS1)
2596             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2597         flush_put_bits(&s->pb);
2598         s->frame_bits  = put_bits_count(&s->pb);
2599
2600         stuffing_count= ff_vbv_update(s, s->frame_bits);
2601         if(stuffing_count){
2602             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2603                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2604                 return -1;
2605             }
2606
2607             switch(s->codec_id){
2608             case CODEC_ID_MPEG1VIDEO:
2609             case CODEC_ID_MPEG2VIDEO:
2610                 while(stuffing_count--){
2611                     put_bits(&s->pb, 8, 0);
2612                 }
2613             break;
2614             case CODEC_ID_MPEG4:
2615                 put_bits(&s->pb, 16, 0);
2616                 put_bits(&s->pb, 16, 0x1C3);
2617                 stuffing_count -= 4;
2618                 while(stuffing_count--){
2619                     put_bits(&s->pb, 8, 0xFF);
2620                 }
2621             break;
2622             default:
2623                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2624             }
2625             flush_put_bits(&s->pb);
2626             s->frame_bits  = put_bits_count(&s->pb);
2627         }
2628
2629         /* update mpeg1/2 vbv_delay for CBR */
2630         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2631            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2632             int vbv_delay;
2633
2634             assert(s->repeat_first_field==0);
2635
2636             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2637             assert(vbv_delay < 0xFFFF);
2638
2639             s->vbv_delay_ptr[0] &= 0xF8;
2640             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2641             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2642             s->vbv_delay_ptr[2] &= 0x07;
2643             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2644         }
2645         s->total_bits += s->frame_bits;
2646         avctx->frame_bits  = s->frame_bits;
2647     }else{
2648         assert((pbBufPtr(&s->pb) == s->pb.buf));
2649         s->frame_bits=0;
2650     }
2651     assert((s->frame_bits&7)==0);
2652
2653     return s->frame_bits/8;
2654 }
2655
2656 #endif //CONFIG_ENCODERS
2657
2658 static inline void gmc1_motion(MpegEncContext *s,
2659                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2660                                uint8_t **ref_picture)
2661 {
2662     uint8_t *ptr;
2663     int offset, src_x, src_y, linesize, uvlinesize;
2664     int motion_x, motion_y;
2665     int emu=0;
2666
2667     motion_x= s->sprite_offset[0][0];
2668     motion_y= s->sprite_offset[0][1];
2669     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2670     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2671     motion_x<<=(3-s->sprite_warping_accuracy);
2672     motion_y<<=(3-s->sprite_warping_accuracy);
2673     src_x = av_clip(src_x, -16, s->width);
2674     if (src_x == s->width)
2675         motion_x =0;
2676     src_y = av_clip(src_y, -16, s->height);
2677     if (src_y == s->height)
2678         motion_y =0;
2679
2680     linesize = s->linesize;
2681     uvlinesize = s->uvlinesize;
2682
2683     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2684
2685     if(s->flags&CODEC_FLAG_EMU_EDGE){
2686         if(   (unsigned)src_x >= s->h_edge_pos - 17
2687            || (unsigned)src_y >= s->v_edge_pos - 17){
2688             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2689             ptr= s->edge_emu_buffer;
2690         }
2691     }
2692
2693     if((motion_x|motion_y)&7){
2694         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2695         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2696     }else{
2697         int dxy;
2698
2699         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2700         if (s->no_rounding){
2701             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2702         }else{
2703             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2704         }
2705     }
2706
2707     if(s->flags&CODEC_FLAG_GRAY) return;
2708
2709     motion_x= s->sprite_offset[1][0];
2710     motion_y= s->sprite_offset[1][1];
2711     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2712     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2713     motion_x<<=(3-s->sprite_warping_accuracy);
2714     motion_y<<=(3-s->sprite_warping_accuracy);
2715     src_x = av_clip(src_x, -8, s->width>>1);
2716     if (src_x == s->width>>1)
2717         motion_x =0;
2718     src_y = av_clip(src_y, -8, s->height>>1);
2719     if (src_y == s->height>>1)
2720         motion_y =0;
2721
2722     offset = (src_y * uvlinesize) + src_x;
2723     ptr = ref_picture[1] + offset;
2724     if(s->flags&CODEC_FLAG_EMU_EDGE){
2725         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2726            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2727             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2728             ptr= s->edge_emu_buffer;
2729             emu=1;
2730         }
2731     }
2732     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2733
2734     ptr = ref_picture[2] + offset;
2735     if(emu){
2736         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2737         ptr= s->edge_emu_buffer;
2738     }
2739     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2740
2741     return;
2742 }
2743
2744 static inline void gmc_motion(MpegEncContext *s,
2745                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2746                                uint8_t **ref_picture)
2747 {
2748     uint8_t *ptr;
2749     int linesize, uvlinesize;
2750     const int a= s->sprite_warping_accuracy;
2751     int ox, oy;
2752
2753     linesize = s->linesize;
2754     uvlinesize = s->uvlinesize;
2755
2756     ptr = ref_picture[0];
2757
2758     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2759     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2760
2761     s->dsp.gmc(dest_y, ptr, linesize, 16,
2762            ox,
2763            oy,
2764            s->sprite_delta[0][0], s->sprite_delta[0][1],
2765            s->sprite_delta[1][0], s->sprite_delta[1][1],
2766            a+1, (1<<(2*a+1)) - s->no_rounding,
2767            s->h_edge_pos, s->v_edge_pos);
2768     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2769            ox + s->sprite_delta[0][0]*8,
2770            oy + s->sprite_delta[1][0]*8,
2771            s->sprite_delta[0][0], s->sprite_delta[0][1],
2772            s->sprite_delta[1][0], s->sprite_delta[1][1],
2773            a+1, (1<<(2*a+1)) - s->no_rounding,
2774            s->h_edge_pos, s->v_edge_pos);
2775
2776     if(s->flags&CODEC_FLAG_GRAY) return;
2777
2778     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2779     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2780
2781     ptr = ref_picture[1];
2782     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2783            ox,
2784            oy,
2785            s->sprite_delta[0][0], s->sprite_delta[0][1],
2786            s->sprite_delta[1][0], s->sprite_delta[1][1],
2787            a+1, (1<<(2*a+1)) - s->no_rounding,
2788            s->h_edge_pos>>1, s->v_edge_pos>>1);
2789
2790     ptr = ref_picture[2];
2791     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2792            ox,
2793            oy,
2794            s->sprite_delta[0][0], s->sprite_delta[0][1],
2795            s->sprite_delta[1][0], s->sprite_delta[1][1],
2796            a+1, (1<<(2*a+1)) - s->no_rounding,
2797            s->h_edge_pos>>1, s->v_edge_pos>>1);
2798 }
2799
2800 /**
2801  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2802  * @param buf destination buffer
2803  * @param src source buffer
2804  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2805  * @param block_w width of block
2806  * @param block_h height of block
2807  * @param src_x x coordinate of the top left sample of the block in the source buffer
2808  * @param src_y y coordinate of the top left sample of the block in the source buffer
2809  * @param w width of the source buffer
2810  * @param h height of the source buffer
2811  */
2812 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2813                                     int src_x, int src_y, int w, int h){
2814     int x, y;
2815     int start_y, start_x, end_y, end_x;
2816
2817     if(src_y>= h){
2818         src+= (h-1-src_y)*linesize;
2819         src_y=h-1;
2820     }else if(src_y<=-block_h){
2821         src+= (1-block_h-src_y)*linesize;
2822         src_y=1-block_h;
2823     }
2824     if(src_x>= w){
2825         src+= (w-1-src_x);
2826         src_x=w-1;
2827     }else if(src_x<=-block_w){
2828         src+= (1-block_w-src_x);
2829         src_x=1-block_w;
2830     }
2831
2832     start_y= FFMAX(0, -src_y);
2833     start_x= FFMAX(0, -src_x);
2834     end_y= FFMIN(block_h, h-src_y);
2835     end_x= FFMIN(block_w, w-src_x);
2836
2837     // copy existing part
2838     for(y=start_y; y<end_y; y++){
2839         for(x=start_x; x<end_x; x++){
2840             buf[x + y*linesize]= src[x + y*linesize];
2841         }
2842     }
2843
2844     //top
2845     for(y=0; y<start_y; y++){
2846         for(x=start_x; x<end_x; x++){
2847             buf[x + y*linesize]= buf[x + start_y*linesize];
2848         }
2849     }
2850
2851     //bottom
2852     for(y=end_y; y<block_h; y++){
2853         for(x=start_x; x<end_x; x++){
2854             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2855         }
2856     }
2857
2858     for(y=0; y<block_h; y++){
2859        //left
2860         for(x=0; x<start_x; x++){
2861             buf[x + y*linesize]= buf[start_x + y*linesize];
2862         }
2863
2864        //right
2865         for(x=end_x; x<block_w; x++){
2866             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2867         }
2868     }
2869 }
2870
2871 static inline int hpel_motion(MpegEncContext *s,
2872                                   uint8_t *dest, uint8_t *src,
2873                                   int field_based, int field_select,
2874                                   int src_x, int src_y,
2875                                   int width, int height, int stride,
2876                                   int h_edge_pos, int v_edge_pos,
2877                                   int w, int h, op_pixels_func *pix_op,
2878                                   int motion_x, int motion_y)
2879 {
2880     int dxy;
2881     int emu=0;
2882
2883     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2884     src_x += motion_x >> 1;
2885     src_y += motion_y >> 1;
2886
2887     /* WARNING: do no forget half pels */
2888     src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu?
2889     if (src_x == width)
2890         dxy &= ~1;
2891     src_y = av_clip(src_y, -16, height);
2892     if (src_y == height)
2893         dxy &= ~2;
2894     src += src_y * stride + src_x;
2895
2896     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2897         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2898            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2899             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2900                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2901             src= s->edge_emu_buffer;
2902             emu=1;
2903         }
2904     }
2905     if(field_select)
2906         src += s->linesize;
2907     pix_op[dxy](dest, src, stride, h);
2908     return emu;
2909 }
2910
2911 static inline int hpel_motion_lowres(MpegEncContext *s,
2912                                   uint8_t *dest, uint8_t *src,
2913                                   int field_based, int field_select,
2914                                   int src_x, int src_y,
2915                                   int width, int height, int stride,
2916                                   int h_edge_pos, int v_edge_pos,
2917                                   int w, int h, h264_chroma_mc_func *pix_op,
2918                                   int motion_x, int motion_y)
2919 {
2920     const int lowres= s->avctx->lowres;
2921     const int s_mask= (2<<lowres)-1;
2922     int emu=0;
2923     int sx, sy;
2924
2925     if(s->quarter_sample){
2926         motion_x/=2;
2927         motion_y/=2;
2928     }
2929
2930     sx= motion_x & s_mask;
2931     sy= motion_y & s_mask;
2932     src_x += motion_x >> (lowres+1);
2933     src_y += motion_y >> (lowres+1);
2934
2935     src += src_y * stride + src_x;
2936
2937     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2938        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2939         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2940                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2941         src= s->edge_emu_buffer;
2942         emu=1;
2943     }
2944
2945     sx <<= 2 - lowres;
2946     sy <<= 2 - lowres;
2947     if(field_select)
2948         src += s->linesize;
2949     pix_op[lowres](dest, src, stride, h, sx, sy);
2950     return emu;
2951 }
2952
2953 /* apply one mpeg motion vector to the three components */
2954 static av_always_inline void mpeg_motion(MpegEncContext *s,
2955                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2956                                int field_based, int bottom_field, int field_select,
2957                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2958                                int motion_x, int motion_y, int h)
2959 {
2960     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2961     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2962
2963 #if 0
2964 if(s->quarter_sample)
2965 {
2966     motion_x>>=1;
2967     motion_y>>=1;
2968 }
2969 #endif
2970
2971     v_edge_pos = s->v_edge_pos >> field_based;
2972     linesize   = s->current_picture.linesize[0] << field_based;
2973     uvlinesize = s->current_picture.linesize[1] << field_based;
2974
2975     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2976     src_x = s->mb_x* 16               + (motion_x >> 1);
2977     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2978
2979     if (s->out_format == FMT_H263) {
2980         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2981             mx = (motion_x>>1)|(motion_x&1);
2982             my = motion_y >>1;
2983             uvdxy = ((my & 1) << 1) | (mx & 1);
2984             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2985             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2986         }else{
2987             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2988             uvsrc_x = src_x>>1;
2989             uvsrc_y = src_y>>1;
2990         }
2991     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2992         mx = motion_x / 4;
2993         my = motion_y / 4;
2994         uvdxy = 0;
2995         uvsrc_x = s->mb_x*8 + mx;
2996         uvsrc_y = s->mb_y*8 + my;
2997     } else {
2998         if(s->chroma_y_shift){
2999             mx = motion_x / 2;
3000             my = motion_y / 2;
3001             uvdxy = ((my & 1) << 1) | (mx & 1);
3002             uvsrc_x = s->mb_x* 8               + (mx >> 1);
3003             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
3004         } else {
3005             if(s->chroma_x_shift){
3006             //Chroma422
3007                 mx = motion_x / 2;
3008                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
3009                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
3010                 uvsrc_y = src_y;
3011             } else {
3012             //Chroma444
3013                 uvdxy = dxy;
3014                 uvsrc_x = src_x;
3015                 uvsrc_y = src_y;
3016             }
3017         }
3018     }
3019
3020     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3021     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3022     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3023
3024     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
3025        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
3026             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
3027                s->codec_id == CODEC_ID_MPEG1VIDEO){
3028                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
3029                 return ;
3030             }
3031             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3032                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3033             ptr_y = s->edge_emu_buffer;
3034             if(!(s->flags&CODEC_FLAG_GRAY)){
3035                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3036                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3037                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3038                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3039                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3040                 ptr_cb= uvbuf;
3041                 ptr_cr= uvbuf+16;
3042             }
3043     }
3044
3045     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3046         dest_y += s->linesize;
3047         dest_cb+= s->uvlinesize;
3048         dest_cr+= s->uvlinesize;
3049     }
3050
3051     if(field_select){
3052         ptr_y += s->linesize;
3053         ptr_cb+= s->uvlinesize;
3054         ptr_cr+= s->uvlinesize;
3055     }
3056
3057     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
3058
3059     if(!(s->flags&CODEC_FLAG_GRAY)){
3060         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3061         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3062     }
3063     if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) && s->out_format == FMT_H261){
3064         ff_h261_loop_filter(s);
3065     }
3066 }
3067
3068 /* apply one mpeg motion vector to the three components */
3069 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
3070                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3071                                int field_based, int bottom_field, int field_select,
3072                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3073                                int motion_x, int motion_y, int h)
3074 {
3075     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3076     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3077     const int lowres= s->avctx->lowres;
3078     const int block_s= 8>>lowres;
3079     const int s_mask= (2<<lowres)-1;
3080     const int h_edge_pos = s->h_edge_pos >> lowres;
3081     const int v_edge_pos = s->v_edge_pos >> lowres;
3082     linesize   = s->current_picture.linesize[0] << field_based;
3083     uvlinesize = s->current_picture.linesize[1] << field_based;
3084
3085     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3086         motion_x/=2;
3087         motion_y/=2;
3088     }
3089
3090     if(field_based){
3091         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3092     }
3093
3094     sx= motion_x & s_mask;
3095     sy= motion_y & s_mask;
3096     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3097     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3098
3099     if (s->out_format == FMT_H263) {
3100         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3101         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3102         uvsrc_x = src_x>>1;
3103         uvsrc_y = src_y>>1;
3104     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3105         mx = motion_x / 4;
3106         my = motion_y / 4;
3107         uvsx = (2*mx) & s_mask;
3108         uvsy = (2*my) & s_mask;
3109         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3110         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3111     } else {
3112         mx = motion_x / 2;
3113         my = motion_y / 2;
3114         uvsx = mx & s_mask;
3115         uvsy = my & s_mask;
3116         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3117         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3118     }
3119
3120     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3121     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3122     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3123
3124     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3125        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3126             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3127                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3128             ptr_y = s->edge_emu_buffer;
3129             if(!(s->flags&CODEC_FLAG_GRAY)){
3130                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3131                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3132                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3133                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3134                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3135                 ptr_cb= uvbuf;
3136                 ptr_cr= uvbuf+16;
3137             }
3138     }
3139
3140     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3141         dest_y += s->linesize;
3142         dest_cb+= s->uvlinesize;
3143         dest_cr+= s->uvlinesize;
3144     }
3145
3146     if(field_select){
3147         ptr_y += s->linesize;
3148         ptr_cb+= s->uvlinesize;
3149         ptr_cr+= s->uvlinesize;
3150     }
3151
3152     sx <<= 2 - lowres;
3153     sy <<= 2 - lowres;
3154     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3155
3156     if(!(s->flags&CODEC_FLAG_GRAY)){
3157         uvsx <<= 2 - lowres;
3158         uvsy <<= 2 - lowres;
3159         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3160         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3161     }
3162     //FIXME h261 lowres loop filter
3163 }
3164
3165 //FIXME move to dsputil, avg variant, 16x16 version
3166 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3167     int x;
3168     uint8_t * const top   = src[1];
3169     uint8_t * const left  = src[2];
3170     uint8_t * const mid   = src[0];
3171     uint8_t * const right = src[3];
3172     uint8_t * const bottom= src[4];
3173 #define OBMC_FILTER(x, t, l, m, r, b)\
3174     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3175 #define OBMC_FILTER4(x, t, l, m, r, b)\
3176     OBMC_FILTER(x         , t, l, m, r, b);\
3177     OBMC_FILTER(x+1       , t, l, m, r, b);\
3178     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3179     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3180
3181     x=0;
3182     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3183     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3184     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3185     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3186     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3187     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3188     x+= stride;
3189     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3190     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3191     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3192     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3193     x+= stride;
3194     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3195     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3196     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3197     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3198     x+= 2*stride;
3199     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3200     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3201     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3202     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3203     x+= 2*stride;
3204     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3205     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3206     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3207     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3208     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3209     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3210     x+= stride;
3211     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3212     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3213     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3214     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3215 }
3216
3217 /* obmc for 1 8x8 luma block */
3218 static inline void obmc_motion(MpegEncContext *s,
3219                                uint8_t *dest, uint8_t *src,
3220                                int src_x, int src_y,
3221                                op_pixels_func *pix_op,
3222                                int16_t mv[5][2]/* mid top left right bottom*/)
3223 #define MID    0
3224 {
3225     int i;
3226     uint8_t *ptr[5];
3227
3228     assert(s->quarter_sample==0);
3229
3230     for(i=0; i<5; i++){
3231         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3232             ptr[i]= ptr[MID];
3233         }else{
3234             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3235             hpel_motion(s, ptr[i], src, 0, 0,
3236                         src_x, src_y,
3237                         s->width, s->height, s->linesize,
3238                         s->h_edge_pos, s->v_edge_pos,
3239                         8, 8, pix_op,
3240                         mv[i][0], mv[i][1]);
3241         }
3242     }
3243
3244     put_obmc(dest, ptr, s->linesize);
3245 }
3246
3247 static inline void qpel_motion(MpegEncContext *s,
3248                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3249                                int field_based, int bottom_field, int field_select,
3250                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3251                                qpel_mc_func (*qpix_op)[16],
3252                                int motion_x, int motion_y, int h)
3253 {
3254     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3255     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3256
3257     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3258     src_x = s->mb_x *  16                 + (motion_x >> 2);
3259     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3260
3261     v_edge_pos = s->v_edge_pos >> field_based;
3262     linesize = s->linesize << field_based;
3263     uvlinesize = s->uvlinesize << field_based;
3264
3265     if(field_based){
3266         mx= motion_x/2;
3267         my= motion_y>>1;
3268     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3269         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3270         mx= (motion_x>>1) + rtab[motion_x&7];
3271         my= (motion_y>>1) + rtab[motion_y&7];
3272     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3273         mx= (motion_x>>1)|(motion_x&1);
3274         my= (motion_y>>1)|(motion_y&1);
3275     }else{
3276         mx= motion_x/2;
3277         my= motion_y/2;
3278     }
3279     mx= (mx>>1)|(mx&1);
3280     my= (my>>1)|(my&1);
3281
3282     uvdxy= (mx&1) | ((my&1)<<1);
3283     mx>>=1;
3284     my>>=1;
3285
3286     uvsrc_x = s->mb_x *  8                 + mx;
3287     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3288
3289     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3290     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3291     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3292
3293     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3294        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3295         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3296                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3297         ptr_y= s->edge_emu_buffer;
3298         if(!(s->flags&CODEC_FLAG_GRAY)){
3299             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3300             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3301                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3302             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3303                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3304             ptr_cb= uvbuf;
3305             ptr_cr= uvbuf + 16;
3306         }
3307     }
3308
3309     if(!field_based)
3310         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3311     else{
3312         if(bottom_field){
3313             dest_y += s->linesize;
3314             dest_cb+= s->uvlinesize;
3315             dest_cr+= s->uvlinesize;
3316         }
3317
3318         if(field_select){
3319             ptr_y  += s->linesize;
3320             ptr_cb += s->uvlinesize;
3321             ptr_cr += s->uvlinesize;
3322         }
3323         //damn interlaced mode
3324         //FIXME boundary mirroring is not exactly correct here
3325         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3326         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3327     }
3328     if(!(s->flags&CODEC_FLAG_GRAY)){
3329         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3330         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3331     }
3332 }
3333
3334 inline int ff_h263_round_chroma(int x){
3335     if (x >= 0)
3336         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3337     else {
3338         x = -x;
3339         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3340     }
3341 }
3342
3343 /**
3344  * h263 chorma 4mv motion compensation.
3345  */
3346 static inline void chroma_4mv_motion(MpegEncContext *s,
3347                                      uint8_t *dest_cb, uint8_t *dest_cr,
3348                                      uint8_t **ref_picture,
3349                                      op_pixels_func *pix_op,
3350                                      int mx, int my){
3351     int dxy, emu=0, src_x, src_y, offset;
3352     uint8_t *ptr;
3353
3354     /* In case of 8X8, we construct a single chroma motion vector
3355        with a special rounding */
3356     mx= ff_h263_round_chroma(mx);
3357     my= ff_h263_round_chroma(my);
3358
3359     dxy = ((my & 1) << 1) | (mx & 1);
3360     mx >>= 1;
3361     my >>= 1;
3362
3363     src_x = s->mb_x * 8 + mx;
3364     src_y = s->mb_y * 8 + my;
3365     src_x = av_clip(src_x, -8, s->width/2);
3366     if (src_x == s->width/2)
3367         dxy &= ~1;
3368     src_y = av_clip(src_y, -8, s->height/2);
3369     if (src_y == s->height/2)
3370         dxy &= ~2;
3371
3372     offset = (src_y * (s->uvlinesize)) + src_x;
3373     ptr = ref_picture[1] + offset;
3374     if(s->flags&CODEC_FLAG_EMU_EDGE){
3375         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3376            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3377             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3378             ptr= s->edge_emu_buffer;
3379             emu=1;
3380         }
3381     }
3382     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3383
3384     ptr = ref_picture[2] + offset;
3385     if(emu){
3386         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3387         ptr= s->edge_emu_buffer;
3388     }
3389     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3390 }
3391
3392 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3393                                      uint8_t *dest_cb, uint8_t *dest_cr,
3394                                      uint8_t **ref_picture,
3395                                      h264_chroma_mc_func *pix_op,
3396                                      int mx, int my){
3397     const int lowres= s->avctx->lowres;
3398     const int block_s= 8>>lowres;
3399     const int s_mask= (2<<lowres)-1;
3400     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3401     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3402     int emu=0, src_x, src_y, offset, sx, sy;
3403     uint8_t *ptr;
3404
3405     if(s->quarter_sample){
3406         mx/=2;
3407         my/=2;
3408     }
3409
3410     /* In case of 8X8, we construct a single chroma motion vector
3411        with a special rounding */
3412     mx= ff_h263_round_chroma(mx);
3413     my= ff_h263_round_chroma(my);
3414
3415     sx= mx & s_mask;
3416     sy= my & s_mask;
3417     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3418     src_y = s->mb_y*block_s + (my >> (lowres+1));
3419
3420     offset = src_y * s->uvlinesize + src_x;
3421     ptr = ref_picture[1] + offset;
3422     if(s->flags&CODEC_FLAG_EMU_EDGE){
3423         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3424            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3425             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3426             ptr= s->edge_emu_buffer;
3427             emu=1;
3428         }
3429     }
3430     sx <<= 2 - lowres;
3431     sy <<= 2 - lowres;
3432     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3433
3434     ptr = ref_picture[2] + offset;
3435     if(emu){
3436         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3437         ptr= s->edge_emu_buffer;
3438     }
3439     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3440 }
3441
3442 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3443     /* fetch pixels for estimated mv 4 macroblocks ahead
3444      * optimized for 64byte cache lines */
3445     const int shift = s->quarter_sample ? 2 : 1;
3446     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3447     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3448     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3449     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3450     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3451     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3452 }
3453
3454 /**
3455  * motion compensation of a single macroblock
3456  * @param s context
3457  * @param dest_y luma destination pointer
3458  * @param dest_cb chroma cb/u destination pointer
3459  * @param dest_cr chroma cr/v destination pointer
3460  * @param dir direction (0->forward, 1->backward)
3461  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3462  * @param pic_op halfpel motion compensation function (average or put normally)
3463  * @param pic_op qpel motion compensation function (average or put normally)
3464  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3465  */
3466 static inline void MPV_motion(MpegEncContext *s,
3467                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3468                               int dir, uint8_t **ref_picture,
3469                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3470 {
3471     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3472     int mb_x, mb_y, i;
3473     uint8_t *ptr, *dest;
3474
3475     mb_x = s->mb_x;
3476     mb_y = s->mb_y;
3477
3478     prefetch_motion(s, ref_picture, dir);
3479
3480     if(s->obmc && s->pict_type != B_TYPE){
3481         int16_t mv_cache[4][4][2];
3482         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3483         const int mot_stride= s->b8_stride;
3484         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3485
3486         assert(!s->mb_skipped);
3487
3488         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3489         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3490         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3491
3492         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3493             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3494         }else{
3495             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3496         }
3497
3498         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3499             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3500             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3501         }else{
3502             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3503             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3504         }
3505
3506         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3507             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3508             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3509         }else{
3510             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3511             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3512         }
3513
3514         mx = 0;
3515         my = 0;
3516         for(i=0;i<4;i++) {
3517             const int x= (i&1)+1;
3518             const int y= (i>>1)+1;
3519             int16_t mv[5][2]= {
3520                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3521                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3522                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3523                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3524                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3525             //FIXME cleanup
3526             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3527                         ref_picture[0],
3528                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3529                         pix_op[1],
3530                         mv);
3531
3532             mx += mv[0][0];
3533             my += mv[0][1];
3534         }
3535         if(!(s->flags&CODEC_FLAG_GRAY))
3536             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3537
3538         return;
3539     }
3540
3541     switch(s->mv_type) {
3542     case MV_TYPE_16X16:
3543         if(s->mcsel){
3544             if(s->real_sprite_warping_points==1){
3545                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3546                             ref_picture);
3547             }else{
3548                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3549                             ref_picture);
3550             }
3551         }else if(s->quarter_sample){
3552             qpel_motion(s, dest_y, dest_cb, dest_cr,
3553                         0, 0, 0,
3554                         ref_picture, pix_op, qpix_op,
3555                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3556         }else if(ENABLE_WMV2 && s->mspel){
3557             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3558                         ref_picture, pix_op,
3559                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3560         }else
3561         {
3562             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3563                         0, 0, 0,
3564                         ref_picture, pix_op,
3565                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3566         }
3567         break;
3568     case MV_TYPE_8X8:
3569         mx = 0;
3570         my = 0;
3571         if(s->quarter_sample){
3572             for(i=0;i<4;i++) {
3573                 motion_x = s->mv[dir][i][0];
3574                 motion_y = s->mv[dir][i][1];
3575
3576                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3577                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3578                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3579
3580                 /* WARNING: do no forget half pels */
3581                 src_x = av_clip(src_x, -16, s->width);
3582                 if (src_x == s->width)
3583                     dxy &= ~3;
3584                 src_y = av_clip(src_y, -16, s->height);
3585                 if (src_y == s->height)
3586                     dxy &= ~12;
3587
3588                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3589                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3590                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3591                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3592                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3593                         ptr= s->edge_emu_buffer;
3594                     }
3595                 }
3596                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3597                 qpix_op[1][dxy](dest, ptr, s->linesize);
3598
3599                 mx += s->mv[dir][i][0]/2;
3600                 my += s->mv[dir][i][1]/2;
3601             }
3602         }else{
3603             for(i=0;i<4;i++) {
3604                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3605                             ref_picture[0], 0, 0,
3606                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3607                             s->width, s->height, s->linesize,
3608                             s->h_edge_pos, s->v_edge_pos,
3609                             8, 8, pix_op[1],
3610                             s->mv[dir][i][0], s->mv[dir][i][1]);
3611
3612                 mx += s->mv[dir][i][0];
3613                 my += s->mv[dir][i][1];
3614             }
3615         }
3616
3617         if(!(s->flags&CODEC_FLAG_GRAY))
3618             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3619         break;
3620     case MV_TYPE_FIELD:
3621         if (s->picture_structure == PICT_FRAME) {
3622             if(s->quarter_sample){
3623                 for(i=0; i<2; i++){
3624                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3625                                 1, i, s->field_select[dir][i],
3626                                 ref_picture, pix_op, qpix_op,
3627                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3628                 }
3629             }else{
3630                 /* top field */
3631                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3632                             1, 0, s->field_select[dir][0],
3633                             ref_picture, pix_op,
3634                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3635                 /* bottom field */
3636                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3637                             1, 1, s->field_select[dir][1],
3638                             ref_picture, pix_op,
3639                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3640             }
3641         } else {
3642             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3643                 ref_picture= s->current_picture_ptr->data;
3644             }
3645
3646             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3647                         0, 0, s->field_select[dir][0],
3648                         ref_picture, pix_op,
3649                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3650         }
3651         break;
3652     case MV_TYPE_16X8:
3653         for(i=0; i<2; i++){
3654             uint8_t ** ref2picture;
3655
3656             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3657                 ref2picture= ref_picture;
3658             }else{
3659                 ref2picture= s->current_picture_ptr->data;
3660             }
3661
3662             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3663                         0, 0, s->field_select[dir][i],
3664                         ref2picture, pix_op,
3665                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3666
3667             dest_y += 16*s->linesize;
3668             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3669             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3670         }
3671         break;
3672     case MV_TYPE_DMV:
3673         if(s->picture_structure == PICT_FRAME){
3674             for(i=0; i<2; i++){
3675                 int j;
3676                 for(j=0; j<2; j++){
3677                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3678                                 1, j, j^i,
3679                                 ref_picture, pix_op,
3680                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3681                 }
3682                 pix_op = s->dsp.avg_pixels_tab;
3683             }
3684         }else{
3685             for(i=0; i<2; i++){
3686                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3687                             0, 0, s->picture_structure != i+1,
3688                             ref_picture, pix_op,
3689                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3690
3691                 // after put we make avg of the same block
3692                 pix_op=s->dsp.avg_pixels_tab;
3693
3694                 //opposite parity is always in the same frame if this is second field
3695                 if(!s->first_field){
3696                     ref_picture = s->current_picture_ptr->data;
3697                 }
3698             }
3699         }
3700     break;
3701     default: assert(0);
3702     }
3703 }
3704
3705 /**
3706  * motion compensation of a single macroblock
3707  * @param s context
3708  * @param dest_y luma destination pointer
3709  * @param dest_cb chroma cb/u destination pointer
3710  * @param dest_cr chroma cr/v destination pointer
3711  * @param dir direction (0->forward, 1->backward)
3712  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3713  * @param pic_op halfpel motion compensation function (average or put normally)
3714  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3715  */
3716 static inline void MPV_motion_lowres(MpegEncContext *s,
3717                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3718                               int dir, uint8_t **ref_picture,
3719                               h264_chroma_mc_func *pix_op)
3720 {
3721     int mx, my;
3722     int mb_x, mb_y, i;
3723     const int lowres= s->avctx->lowres;
3724     const int block_s= 8>>lowres;
3725
3726     mb_x = s->mb_x;
3727     mb_y = s->mb_y;
3728
3729     switch(s->mv_type) {
3730     case MV_TYPE_16X16:
3731         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3732                     0, 0, 0,
3733                     ref_picture, pix_op,
3734                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3735         break;
3736     case MV_TYPE_8X8:
3737         mx = 0;
3738         my = 0;
3739             for(i=0;i<4;i++) {
3740                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3741                             ref_picture[0], 0, 0,
3742                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3743                             s->width, s->height, s->linesize,
3744                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3745                             block_s, block_s, pix_op,
3746                             s->mv[dir][i][0], s->mv[dir][i][1]);
3747
3748                 mx += s->mv[dir][i][0];
3749                 my += s->mv[dir][i][1];
3750             }
3751
3752         if(!(s->flags&CODEC_FLAG_GRAY))
3753             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3754         break;
3755     case MV_TYPE_FIELD:
3756         if (s->picture_structure == PICT_FRAME) {
3757             /* top field */
3758             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3759                         1, 0, s->field_select[dir][0],
3760                         ref_picture, pix_op,
3761                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3762             /* bottom field */
3763             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3764                         1, 1, s->field_select[dir][1],
3765                         ref_picture, pix_op,
3766                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3767         } else {
3768             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3769                 ref_picture= s->current_picture_ptr->data;
3770             }
3771
3772             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3773                         0, 0, s->field_select[dir][0],
3774                         ref_picture, pix_op,
3775                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3776         }
3777         break;
3778     case MV_TYPE_16X8:
3779         for(i=0; i<2; i++){
3780             uint8_t ** ref2picture;
3781
3782             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3783                 ref2picture= ref_picture;
3784             }else{
3785                 ref2picture= s->current_picture_ptr->data;
3786             }
3787
3788             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3789                         0, 0, s->field_select[dir][i],
3790                         ref2picture, pix_op,
3791                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3792
3793             dest_y += 2*block_s*s->linesize;
3794             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3795             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3796         }
3797         break;
3798     case MV_TYPE_DMV:
3799         if(s->picture_structure == PICT_FRAME){
3800             for(i=0; i<2; i++){
3801                 int j;
3802                 for(j=0; j<2; j++){
3803                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3804                                 1, j, j^i,
3805                                 ref_picture, pix_op,
3806                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3807                 }
3808                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3809             }
3810         }else{
3811             for(i=0; i<2; i++){
3812                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3813                             0, 0, s->picture_structure != i+1,
3814                             ref_picture, pix_op,
3815                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3816
3817                 // after put we make avg of the same block
3818                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3819
3820                 //opposite parity is always in the same frame if this is second field
3821                 if(!s->first_field){
3822                     ref_picture = s->current_picture_ptr->data;
3823                 }
3824             }
3825         }
3826     break;
3827     default: assert(0);
3828     }
3829 }
3830
3831 /* put block[] to dest[] */
3832 static inline void put_dct(MpegEncContext *s,
3833                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3834 {
3835     s->dct_unquantize_intra(s, block, i, qscale);
3836     s->dsp.idct_put (dest, line_size, block);
3837 }
3838
3839 /* add block[] to dest[] */
3840 static inline void add_dct(MpegEncContext *s,
3841                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3842 {
3843     if (s->block_last_index[i] >= 0) {
3844         s->dsp.idct_add (dest, line_size, block);
3845     }
3846 }
3847
3848 static inline void add_dequant_dct(MpegEncContext *s,
3849                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3850 {
3851     if (s->block_last_index[i] >= 0) {
3852         s->dct_unquantize_inter(s, block, i, qscale);
3853
3854         s->dsp.idct_add (dest, line_size, block);
3855     }
3856 }
3857
3858 /**
3859  * cleans dc, ac, coded_block for the current non intra MB
3860  */
3861 void ff_clean_intra_table_entries(MpegEncContext *s)
3862 {
3863     int wrap = s->b8_stride;
3864     int xy = s->block_index[0];
3865
3866     s->dc_val[0][xy           ] =
3867     s->dc_val[0][xy + 1       ] =
3868     s->dc_val[0][xy     + wrap] =
3869     s->dc_val[0][xy + 1 + wrap] = 1024;
3870     /* ac pred */
3871     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3872     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3873     if (s->msmpeg4_version>=3) {
3874         s->coded_block[xy           ] =
3875         s->coded_block[xy + 1       ] =
3876         s->coded_block[xy     + wrap] =
3877         s->coded_block[xy + 1 + wrap] = 0;
3878     }
3879     /* chroma */
3880     wrap = s->mb_stride;
3881     xy = s->mb_x + s->mb_y * wrap;
3882     s->dc_val[1][xy] =
3883     s->dc_val[2][xy] = 1024;
3884     /* ac pred */
3885     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3886     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3887
3888     s->mbintra_table[xy]= 0;
3889 }
3890
3891 /* generic function called after a macroblock has been parsed by the
3892    decoder or after it has been encoded by the encoder.
3893
3894    Important variables used:
3895    s->mb_intra : true if intra macroblock
3896    s->mv_dir   : motion vector direction
3897    s->mv_type  : motion vector type
3898    s->mv       : motion vector
3899    s->interlaced_dct : true if interlaced dct used (mpeg2)
3900  */
3901 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3902 {
3903     int mb_x, mb_y;
3904     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3905 #ifdef HAVE_XVMC
3906     if(s->avctx->xvmc_acceleration){
3907         XVMC_decode_mb(s);//xvmc uses pblocks
3908         return;
3909     }
3910 #endif
3911
3912     mb_x = s->mb_x;
3913     mb_y = s->mb_y;
3914
3915     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3916        /* save DCT coefficients */
3917        int i,j;
3918        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3919        for(i=0; i<6; i++)
3920            for(j=0; j<64; j++)
3921                *dct++ = block[i][s->dsp.idct_permutation[j]];
3922     }
3923
3924     s->current_picture.qscale_table[mb_xy]= s->qscale;
3925
3926     /* update DC predictors for P macroblocks */
3927     if (!s->mb_intra) {
3928         if (s->h263_pred || s->h263_aic) {
3929             if(s->mbintra_table[mb_xy])
3930                 ff_clean_intra_table_entries(s);
3931         } else {
3932             s->last_dc[0] =
3933             s->last_dc[1] =
3934             s->last_dc[2] = 128 << s->intra_dc_precision;
3935         }
3936     }
3937     else if (s->h263_pred || s->h263_aic)
3938         s->mbintra_table[mb_xy]=1;
3939
3940     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
3941         uint8_t *dest_y, *dest_cb, *dest_cr;
3942         int dct_linesize, dct_offset;
3943         op_pixels_func (*op_pix)[4];
3944         qpel_mc_func (*op_qpix)[16];
3945         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3946         const int uvlinesize= s->current_picture.linesize[1];
3947         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3948         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3949
3950         /* avoid copy if macroblock skipped in last frame too */
3951         /* skip only during decoding as we might trash the buffers during encoding a bit */
3952         if(!s->encoding){
3953             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3954             const int age= s->current_picture.age;
3955
3956             assert(age);
3957
3958             if (s->mb_skipped) {
3959                 s->mb_skipped= 0;
3960                 assert(s->pict_type!=I_TYPE);
3961
3962                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3963                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3964
3965                 /* if previous was skipped too, then nothing to do !  */
3966                 if (*mbskip_ptr >= age && s->current_picture.reference){
3967                     return;
3968                 }
3969             } else if(!s->current_picture.reference){
3970                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3971                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3972             } else{
3973                 *mbskip_ptr = 0; /* not skipped */
3974             }
3975         }
3976
3977         dct_linesize = linesize << s->interlaced_dct;
3978         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3979
3980         if(readable){
3981             dest_y=  s->dest[0];
3982             dest_cb= s->dest[1];
3983             dest_cr= s->dest[2];
3984         }else{
3985             dest_y = s->b_scratchpad;
3986             dest_cb= s->b_scratchpad+16*linesize;
3987             dest_cr= s->b_scratchpad+32*linesize;
3988         }
3989
3990         if (!s->mb_intra) {
3991             /* motion handling */
3992             /* decoding or more than one mb_type (MC was already done otherwise) */
3993             if(!s->encoding){
3994                 if(lowres_flag){
3995                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3996
3997                     if (s->mv_dir & MV_DIR_FORWARD) {
3998                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3999                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
4000                     }
4001                     if (s->mv_dir & MV_DIR_BACKWARD) {
4002                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
4003                     }
4004                 }else{
4005                     op_qpix= s->me.qpel_put;
4006                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
4007                         op_pix = s->dsp.put_pixels_tab;
4008                     }else{
4009                         op_pix = s->dsp.put_no_rnd_pixels_tab;
4010                     }
4011                     if (s->mv_dir & MV_DIR_FORWARD) {
4012                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4013                         op_pix = s->dsp.avg_pixels_tab;
4014                         op_qpix= s->me.qpel_avg;
4015                     }
4016                     if (s->mv_dir & MV_DIR_BACKWARD) {
4017                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4018                     }
4019                 }
4020             }
4021
4022             /* skip dequant / idct if we are really late ;) */
4023             if(s->hurry_up>1) goto skip_idct;
4024             if(s->avctx->skip_idct){
4025                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
4026                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
4027                    || s->avctx->skip_idct >= AVDISCARD_ALL)
4028                     goto skip_idct;
4029             }
4030
4031             /* add dct residue */
4032             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
4033                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
4034                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4035                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4036                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4037                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4038
4039                 if(!(s->flags&CODEC_FLAG_GRAY)){
4040                     if (s->chroma_y_shift){
4041                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4042                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4043                     }else{
4044                         dct_linesize >>= 1;
4045                         dct_offset >>=1;
4046                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4047                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4048                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4049                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4050                     }
4051                 }
4052             } else if(s->codec_id != CODEC_ID_WMV2){
4053                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
4054                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
4055                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
4056                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
4057
4058                 if(!(s->flags&CODEC_FLAG_GRAY)){
4059                     if(s->chroma_y_shift){//Chroma420
4060                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4061                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4062                     }else{
4063                         //chroma422
4064                         dct_linesize = uvlinesize << s->interlaced_dct;
4065                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4066
4067                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4068                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4069                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4070                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4071                         if(!s->chroma_x_shift){//Chroma444
4072                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4073                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4074                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4075                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4076                         }
4077                     }
4078                 }//fi gray
4079             }
4080             else if (ENABLE_WMV2) {
4081                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4082             }
4083         } else {
4084             /* dct only in intra block */
4085             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4086                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4087                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4088                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4089                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4090
4091                 if(!(s->flags&CODEC_FLAG_GRAY)){
4092                     if(s->chroma_y_shift){
4093                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4094                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4095                     }else{
4096                         dct_offset >>=1;
4097                         dct_linesize >>=1;
4098                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4099                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4100                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4101                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4102                     }
4103                 }
4104             }else{
4105                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4106                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4107                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4108                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4109
4110                 if(!(s->flags&CODEC_FLAG_GRAY)){
4111                     if(s->chroma_y_shift){
4112                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4113                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4114                     }else{
4115
4116                         dct_linesize = uvlinesize << s->interlaced_dct;
4117                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4118
4119                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4120                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4121                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4122                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4123                         if(!s->chroma_x_shift){//Chroma444
4124                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4125                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4126                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4127                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4128                         }
4129                     }
4130                 }//gray
4131             }
4132         }
4133 skip_idct:
4134         if(!readable){
4135             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4136             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4137             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4138         }
4139     }
4140 }
4141
4142 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4143     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4144     else                  MPV_decode_mb_internal(s, block, 0);
4145 }
4146
4147 #ifdef CONFIG_ENCODERS
4148
4149 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4150 {
4151     static const char tab[64]=
4152         {3,2,2,1,1,1,1,1,
4153          1,1,1,1,1,1,1,1,
4154          1,1,1,1,1,1,1,1,
4155          0,0,0,0,0,0,0,0,
4156          0,0,0,0,0,0,0,0,
4157          0,0,0,0,0,0,0,0,
4158          0,0,0,0,0,0,0,0,
4159          0,0,0,0,0,0,0,0};
4160     int score=0;
4161     int run=0;
4162     int i;
4163     DCTELEM *block= s->block[n];
4164     const int last_index= s->block_last_index[n];
4165     int skip_dc;
4166
4167     if(threshold<0){
4168         skip_dc=0;
4169         threshold= -threshold;
4170     }else
4171         skip_dc=1;
4172
4173     /* are all which we could set to zero are allready zero? */
4174     if(last_index<=skip_dc - 1) return;
4175
4176     for(i=0; i<=last_index; i++){
4177         const int j = s->intra_scantable.permutated[i];
4178         const int level = FFABS(block[j]);
4179         if(level==1){
4180             if(skip_dc && i==0) continue;
4181             score+= tab[run];
4182             run=0;
4183         }else if(level>1){
4184             return;
4185         }else{
4186             run++;
4187         }
4188     }
4189     if(score >= threshold) return;
4190     for(i=skip_dc; i<=last_index; i++){
4191         const int j = s->intra_scantable.permutated[i];
4192         block[j]=0;
4193     }
4194     if(block[0]) s->block_last_index[n]= 0;
4195     else         s->block_last_index[n]= -1;
4196 }
4197
4198 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4199 {
4200     int i;
4201     const int maxlevel= s->max_qcoeff;
4202     const int minlevel= s->min_qcoeff;
4203     int overflow=0;
4204
4205     if(s->mb_intra){
4206         i=1; //skip clipping of intra dc
4207     }else
4208         i=0;
4209
4210     for(;i<=last_index; i++){
4211         const int j= s->intra_scantable.permutated[i];
4212         int level = block[j];
4213
4214         if     (level>maxlevel){
4215             level=maxlevel;
4216             overflow++;
4217         }else if(level<minlevel){
4218             level=minlevel;
4219             overflow++;
4220         }
4221
4222         block[j]= level;
4223     }
4224
4225     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4226         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4227 }
4228
4229 #endif //CONFIG_ENCODERS
4230
4231 /**
4232  *
4233  * @param h is the normal height, this will be reduced automatically if needed for the last row
4234  */
4235 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4236     if (s->avctx->draw_horiz_band) {
4237         AVFrame *src;
4238         int offset[4];
4239
4240         if(s->picture_structure != PICT_FRAME){
4241             h <<= 1;
4242             y <<= 1;
4243             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4244         }
4245
4246         h= FFMIN(h, s->avctx->height - y);
4247
4248         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4249             src= (AVFrame*)s->current_picture_ptr;
4250         else if(s->last_picture_ptr)
4251             src= (AVFrame*)s->last_picture_ptr;
4252         else
4253             return;
4254
4255         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4256             offset[0]=
4257             offset[1]=
4258             offset[2]=
4259             offset[3]= 0;
4260         }else{
4261             offset[0]= y * s->linesize;;
4262             offset[1]=
4263             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4264             offset[3]= 0;
4265         }
4266
4267         emms_c();
4268
4269         s->avctx->draw_horiz_band(s->avctx, src, offset,
4270                                   y, s->picture_structure, h);
4271     }
4272 }
4273
4274 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4275     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4276     const int uvlinesize= s->current_picture.linesize[1];
4277     const int mb_size= 4 - s->avctx->lowres;
4278
4279     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4280     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4281     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4282     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4283     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4284     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4285     //block_index is not used by mpeg2, so it is not affected by chroma_format
4286
4287     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4288     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4289     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4290
4291     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4292     {
4293         s->dest[0] += s->mb_y *   linesize << mb_size;
4294         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4295         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4296     }
4297 }
4298
4299 #ifdef CONFIG_ENCODERS
4300
4301 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4302     int x, y;
4303 //FIXME optimize
4304     for(y=0; y<8; y++){
4305         for(x=0; x<8; x++){
4306             int x2, y2;
4307             int sum=0;
4308             int sqr=0;
4309             int count=0;
4310
4311             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4312                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4313                     int v= ptr[x2 + y2*stride];
4314                     sum += v;
4315                     sqr += v*v;
4316                     count++;
4317                 }
4318             }
4319             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4320         }
4321     }
4322 }
4323
4324 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4325 {
4326     int16_t weight[8][64];
4327     DCTELEM orig[8][64];
4328     const int mb_x= s->mb_x;
4329     const int mb_y= s->mb_y;
4330     int i;
4331     int skip_dct[8];
4332     int dct_offset   = s->linesize*8; //default for progressive frames
4333     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4334     int wrap_y, wrap_c;
4335
4336     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
4337
4338     if(s->adaptive_quant){
4339         const int last_qp= s->qscale;
4340         const int mb_xy= mb_x + mb_y*s->mb_stride;
4341
4342         s->lambda= s->lambda_table[mb_xy];
4343         update_qscale(s);
4344
4345         if(!(s->flags&CODEC_FLAG_QP_RD)){
4346             s->qscale= s->current_picture_ptr->qscale_table[mb_xy];
4347             s->dquant= s->qscale - last_qp;
4348
4349             if(s->out_format==FMT_H263){
4350                 s->dquant= av_clip(s->dquant, -2, 2);
4351
4352                 if(s->codec_id==CODEC_ID_MPEG4){
4353                     if(!s->mb_intra){
4354                         if(s->pict_type == B_TYPE){
4355                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
4356                                 s->dquant= 0;
4357                         }
4358                         if(s->mv_type==MV_TYPE_8X8)
4359                             s->dquant=0;
4360                     }
4361                 }
4362             }
4363         }
4364         ff_set_qscale(s, last_qp + s->dquant);
4365     }else if(s->flags&CODEC_FLAG_QP_RD)
4366         ff_set_qscale(s, s->qscale + s->dquant);
4367
4368     wrap_y = s->linesize;
4369     wrap_c = s->uvlinesize;
4370     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4371     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4372     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4373
4374     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4375         uint8_t *ebuf= s->edge_emu_buffer + 32;
4376         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4377         ptr_y= ebuf;
4378         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4379         ptr_cb= ebuf+18*wrap_y;
4380         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4381         ptr_cr= ebuf+18*wrap_y+8;
4382     }
4383
4384     if (s->mb_intra) {
4385         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4386             int progressive_score, interlaced_score;
4387
4388             s->interlaced_dct=0;
4389             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4390                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4391
4392             if(progressive_score > 0){
4393                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4394                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4395                 if(progressive_score > interlaced_score){
4396                     s->interlaced_dct=1;
4397
4398                     dct_offset= wrap_y;
4399                     wrap_y<<=1;
4400                     if (s->chroma_format == CHROMA_422)
4401                         wrap_c<<=1;
4402                 }
4403             }
4404         }
4405
4406         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4407         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4408         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4409         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4410
4411         if(s->flags&CODEC_FLAG_GRAY){
4412             skip_dct[4]= 1;
4413             skip_dct[5]= 1;
4414         }else{
4415             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4416             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4417             if(!s->chroma_y_shift){ /* 422 */
4418                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4419                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4420             }
4421         }
4422     }else{
4423         op_pixels_func (*op_pix)[4];
4424         qpel_mc_func (*op_qpix)[16];
4425         uint8_t *dest_y, *dest_cb, *dest_cr;
4426
4427         dest_y  = s->dest[0];
4428         dest_cb = s->dest[1];
4429         dest_cr = s->dest[2];
4430
4431         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4432             op_pix = s->dsp.put_pixels_tab;
4433             op_qpix= s->dsp.put_qpel_pixels_tab;
4434         }else{
4435             op_pix = s->dsp.put_no_rnd_pixels_tab;
4436             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4437         }
4438
4439         if (s->mv_dir & MV_DIR_FORWARD) {
4440             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4441             op_pix = s->dsp.avg_pixels_tab;
4442             op_qpix= s->dsp.avg_qpel_pixels_tab;
4443         }
4444         if (s->mv_dir & MV_DIR_BACKWARD) {
4445             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4446         }
4447
4448         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4449             int progressive_score, interlaced_score;
4450
4451             s->interlaced_dct=0;
4452             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4453                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4454
4455             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4456
4457             if(progressive_score>0){
4458                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4459                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4460
4461                 if(progressive_score > interlaced_score){
4462                     s->interlaced_dct=1;
4463
4464                     dct_offset= wrap_y;
4465                     wrap_y<<=1;
4466                     if (s->chroma_format == CHROMA_422)
4467                         wrap_c<<=1;
4468                 }
4469             }
4470         }
4471
4472         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4473         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4474         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4475         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4476
4477         if(s->flags&CODEC_FLAG_GRAY){
4478             skip_dct[4]= 1;
4479             skip_dct[5]= 1;
4480         }else{
4481             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4482             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4483             if(!s->chroma_y_shift){ /* 422 */
4484                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4485                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4486             }
4487         }
4488         /* pre quantization */
4489         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4490             //FIXME optimize
4491             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4492             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4493             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4494             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4495             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4496             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4497             if(!s->chroma_y_shift){ /* 422 */
4498                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4499                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4500             }
4501         }
4502     }
4503
4504     if(s->avctx->quantizer_noise_shaping){
4505         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4506         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4507         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4508         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4509         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4510         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4511         if(!s->chroma_y_shift){ /* 422 */
4512             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4513             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4514         }
4515         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4516     }
4517
4518     /* DCT & quantize */
4519     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4520     {
4521         for(i=0;i<mb_block_count;i++) {
4522             if(!skip_dct[i]){
4523                 int overflow;
4524                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4525             // FIXME we could decide to change to quantizer instead of clipping
4526             // JS: I don't think that would be a good idea it could lower quality instead
4527             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4528                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4529             }else
4530                 s->block_last_index[i]= -1;
4531         }
4532         if(s->avctx->quantizer_noise_shaping){
4533             for(i=0;i<mb_block_count;i++) {
4534                 if(!skip_dct[i]){
4535                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4536                 }
4537             }
4538         }
4539
4540         if(s->luma_elim_threshold && !s->mb_intra)
4541             for(i=0; i<4; i++)
4542                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4543         if(s->chroma_elim_threshold && !s->mb_intra)
4544             for(i=4; i<mb_block_count; i++)
4545                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4546
4547         if(s->flags & CODEC_FLAG_CBP_RD){
4548             for(i=0;i<mb_block_count;i++) {
4549                 if(s->block_last_index[i] == -1)
4550                     s->coded_score[i]= INT_MAX/256;
4551             }
4552         }
4553     }
4554
4555     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4556         s->block_last_index[4]=
4557         s->block_last_index[5]= 0;
4558         s->block[4][0]=
4559         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4560     }
4561
4562     //non c quantize code returns incorrect block_last_index FIXME
4563     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4564         for(i=0; i<mb_block_count; i++){
4565             int j;
4566             if(s->block_last_index[i]>0){
4567                 for(j=63; j>0; j--){
4568                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4569                 }
4570                 s->block_last_index[i]= j;
4571             }
4572         }
4573     }
4574
4575     /* huffman encode */
4576     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4577     case CODEC_ID_MPEG1VIDEO:
4578     case CODEC_ID_MPEG2VIDEO:
4579         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4580     case CODEC_ID_MPEG4:
4581         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4582     case CODEC_ID_MSMPEG4V2:
4583     case CODEC_ID_MSMPEG4V3:
4584     case CODEC_ID_WMV1:
4585         if (ENABLE_MSMPEG4_ENCODER)
4586             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
4587         break;
4588     case CODEC_ID_WMV2:
4589         if (ENABLE_WMV2_ENCODER)
4590             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
4591         break;
4592     case CODEC_ID_H261:
4593         if (ENABLE_H261_ENCODER)
4594             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
4595         break;
4596     case CODEC_ID_H263:
4597     case CODEC_ID_H263P:
4598     case CODEC_ID_FLV1:
4599     case CODEC_ID_RV10:
4600     case CODEC_ID_RV20:
4601         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4602     case CODEC_ID_MJPEG:
4603         if (ENABLE_MJPEG_ENCODER)
4604             ff_mjpeg_encode_mb(s, s->block);
4605         break;
4606     default:
4607         assert(0);
4608     }
4609 }
4610
4611 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4612 {
4613     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4614     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4615 }
4616
4617 #endif //CONFIG_ENCODERS
4618
4619 void ff_mpeg_flush(AVCodecContext *avctx){
4620     int i;
4621     MpegEncContext *s = avctx->priv_data;
4622
4623     if(s==NULL || s->picture==NULL)
4624         return;
4625
4626     for(i=0; i<MAX_PICTURE_COUNT; i++){
4627        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4628                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4629         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4630     }
4631     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4632
4633     s->mb_x= s->mb_y= 0;
4634
4635     s->parse_context.state= -1;
4636     s->parse_context.frame_start_found= 0;
4637     s->parse_context.overread= 0;
4638     s->parse_context.overread_index= 0;
4639     s->parse_context.index= 0;
4640     s->parse_context.last_index= 0;
4641     s->bitstream_buffer_size=0;
4642     s->pp_time=0;
4643 }
4644
4645 #ifdef CONFIG_ENCODERS
4646 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4647 {
4648     const uint16_t *srcw= (uint16_t*)src;
4649     int words= length>>4;
4650     int bits= length&15;
4651     int i;
4652
4653     if(length==0) return;
4654
4655     if(words < 16){
4656         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4657     }else if(put_bits_count(pb)&7){
4658         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4659     }else{
4660         for(i=0; put_bits_count(pb)&31; i++)
4661             put_bits(pb, 8, src[i]);
4662         flush_put_bits(pb);
4663         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4664         skip_put_bytes(pb, 2*words-i);
4665     }
4666
4667     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4668 }
4669
4670 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4671     int i;
4672
4673     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4674
4675     /* mpeg1 */
4676     d->mb_skip_run= s->mb_skip_run;
4677     for(i=0; i<3; i++)
4678         d->last_dc[i]= s->last_dc[i];
4679
4680     /* statistics */
4681     d->mv_bits= s->mv_bits;
4682     d->i_tex_bits= s->i_tex_bits;
4683     d->p_tex_bits= s->p_tex_bits;
4684     d->i_count= s->i_count;
4685     d->f_count= s->f_count;
4686     d->b_count= s->b_count;
4687     d->skip_count= s->skip_count;
4688     d->misc_bits= s->misc_bits;
4689     d->last_bits= 0;
4690
4691     d->mb_skipped= 0;
4692     d->qscale= s->qscale;
4693     d->dquant= s->dquant;
4694 }
4695
4696 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4697     int i;
4698
4699     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4700     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4701
4702     /* mpeg1 */
4703     d->mb_skip_run= s->mb_skip_run;
4704     for(i=0; i<3; i++)
4705         d->last_dc[i]= s->last_dc[i];
4706
4707     /* statistics */
4708     d->mv_bits= s->mv_bits;
4709     d->i_tex_bits= s->i_tex_bits;
4710     d->p_tex_bits= s->p_tex_bits;
4711     d->i_count= s->i_count;
4712     d->f_count= s->f_count;
4713     d->b_count= s->b_count;
4714     d->skip_count= s->skip_count;
4715     d->misc_bits= s->misc_bits;
4716
4717     d->mb_intra= s->mb_intra;
4718     d->mb_skipped= s->mb_skipped;
4719     d->mv_type= s->mv_type;
4720     d->mv_dir= s->mv_dir;
4721     d->pb= s->pb;
4722     if(s->data_partitioning){
4723         d->pb2= s->pb2;
4724         d->tex_pb= s->tex_pb;
4725     }
4726     d->block= s->block;
4727     for(i=0; i<8; i++)
4728         d->block_last_index[i]= s->block_last_index[i];
4729     d->interlaced_dct= s->interlaced_dct;
4730     d->qscale= s->qscale;
4731 }
4732
4733 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4734                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4735                            int *dmin, int *next_block, int motion_x, int motion_y)
4736 {
4737     int score;
4738     uint8_t *dest_backup[3];
4739
4740     copy_context_before_encode(s, backup, type);
4741
4742     s->block= s->blocks[*next_block];
4743     s->pb= pb[*next_block];
4744     if(s->data_partitioning){
4745         s->pb2   = pb2   [*next_block];
4746         s->tex_pb= tex_pb[*next_block];
4747     }
4748
4749     if(*next_block){
4750         memcpy(dest_backup, s->dest, sizeof(s->dest));
4751         s->dest[0] = s->rd_scratchpad;
4752         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4753         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4754         assert(s->linesize >= 32); //FIXME
4755     }
4756
4757     encode_mb(s, motion_x, motion_y);
4758
4759     score= put_bits_count(&s->pb);
4760     if(s->data_partitioning){
4761         score+= put_bits_count(&s->pb2);
4762         score+= put_bits_count(&s->tex_pb);
4763     }
4764
4765     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4766         MPV_decode_mb(s, s->block);
4767
4768         score *= s->lambda2;
4769         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4770     }
4771
4772     if(*next_block){
4773         memcpy(s->dest, dest_backup, sizeof(s->dest));
4774     }
4775
4776     if(score<*dmin){
4777         *dmin= score;
4778         *next_block^=1;
4779
4780         copy_context_after_encode(best, s, type);
4781     }
4782 }
4783
4784 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4785     uint32_t *sq = ff_squareTbl + 256;
4786     int acc=0;
4787     int x,y;
4788
4789     if(w==16 && h==16)
4790         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4791     else if(w==8 && h==8)
4792         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4793
4794     for(y=0; y<h; y++){
4795         for(x=0; x<w; x++){
4796             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4797         }
4798     }
4799
4800     assert(acc>=0);
4801
4802     return acc;
4803 }
4804
4805 static int sse_mb(MpegEncContext *s){
4806     int w= 16;
4807     int h= 16;
4808
4809     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4810     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4811
4812     if(w==16 && h==16)
4813       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4814         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4815                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4816                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4817       }else{
4818         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4819                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4820                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4821       }
4822     else
4823         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4824                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4825                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4826 }
4827
4828 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4829     MpegEncContext *s= arg;
4830
4831
4832     s->me.pre_pass=1;
4833     s->me.dia_size= s->avctx->pre_dia_size;
4834     s->first_slice_line=1;
4835     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4836         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4837             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4838         }
4839         s->first_slice_line=0;
4840     }
4841
4842     s->me.pre_pass=0;
4843
4844     return 0;
4845 }
4846
4847 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4848     MpegEncContext *s= arg;
4849
4850     ff_check_alignment();
4851
4852     s->me.dia_size= s->avctx->dia_size;
4853     s->first_slice_line=1;
4854     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4855         s->mb_x=0; //for block init below
4856         ff_init_block_index(s);
4857         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4858             s->block_index[0]+=2;
4859             s->block_index[1]+=2;
4860             s->block_index[2]+=2;
4861             s->block_index[3]+=2;
4862
4863             /* compute motion vector & mb_type and store in context */
4864             if(s->pict_type==B_TYPE)
4865                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4866             else
4867                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4868         }
4869         s->first_slice_line=0;
4870     }
4871     return 0;
4872 }
4873
4874 static int mb_var_thread(AVCodecContext *c, void *arg){
4875     MpegEncContext *s= arg;
4876     int mb_x, mb_y;
4877
4878     ff_check_alignment();
4879
4880     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4881         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4882             int xx = mb_x * 16;
4883             int yy = mb_y * 16;
4884             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4885             int varc;
4886             int sum = s->dsp.pix_sum(pix, s->linesize);
4887
4888             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4889
4890             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4891             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4892             s->me.mb_var_sum_temp    += varc;
4893         }
4894     }
4895     return 0;
4896 }
4897
4898 static void write_slice_end(MpegEncContext *s){
4899     if(s->codec_id==CODEC_ID_MPEG4){
4900         if(s->partitioned_frame){
4901             ff_mpeg4_merge_partitions(s);
4902         }
4903
4904         ff_mpeg4_stuffing(&s->pb);
4905     }else if(ENABLE_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
4906         ff_mjpeg_encode_stuffing(&s->pb);
4907     }
4908
4909     align_put_bits(&s->pb);
4910     flush_put_bits(&s->pb);
4911
4912     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4913         s->misc_bits+= get_bits_diff(s);
4914 }
4915
4916 static int encode_thread(AVCodecContext *c, void *arg){
4917     MpegEncContext *s= arg;
4918     int mb_x, mb_y, pdif = 0;
4919     int i, j;
4920     MpegEncContext best_s, backup_s;
4921     uint8_t bit_buf[2][MAX_MB_BYTES];
4922     uint8_t bit_buf2[2][MAX_MB_BYTES];
4923     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4924     PutBitContext pb[2], pb2[2], tex_pb[2];
4925 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4926
4927     ff_check_alignment();
4928
4929     for(i=0; i<2; i++){
4930         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4931         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4932         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4933     }
4934
4935     s->last_bits= put_bits_count(&s->pb);
4936     s->mv_bits=0;
4937     s->misc_bits=0;
4938     s->i_tex_bits=0;
4939     s->p_tex_bits=0;
4940     s->i_count=0;
4941     s->f_count=0;
4942     s->b_count=0;
4943     s->skip_count=0;
4944
4945     for(i=0; i<3; i++){
4946         /* init last dc values */
4947         /* note: quant matrix value (8) is implied here */
4948         s->last_dc[i] = 128 << s->intra_dc_precision;
4949
4950         s->current_picture.error[i] = 0;
4951     }
4952     s->mb_skip_run = 0;
4953     memset(s->last_mv, 0, sizeof(s->last_mv));
4954
4955     s->last_mv_dir = 0;
4956
4957     switch(s->codec_id){
4958     case CODEC_ID_H263:
4959     case CODEC_ID_H263P:
4960     case CODEC_ID_FLV1:
4961         s->gob_index = ff_h263_get_gob_height(s);
4962         break;
4963     case CODEC_ID_MPEG4:
4964         if(s->partitioned_frame)
4965             ff_mpeg4_init_partitions(s);
4966         break;
4967     }
4968
4969     s->resync_mb_x=0;
4970     s->resync_mb_y=0;
4971     s->first_slice_line = 1;
4972     s->ptr_lastgob = s->pb.buf;
4973     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4974 //    printf("row %d at %X\n", s->mb_y, (int)s);
4975         s->mb_x=0;
4976         s->mb_y= mb_y;
4977
4978         ff_set_qscale(s, s->qscale);
4979         ff_init_block_index(s);
4980
4981         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4982             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4983             int mb_type= s->mb_type[xy];
4984 //            int d;
4985             int dmin= INT_MAX;
4986             int dir;
4987
4988             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4989                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4990                 return -1;
4991             }
4992             if(s->data_partitioning){
4993                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4994                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4995                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4996                     return -1;
4997                 }
4998             }
4999
5000             s->mb_x = mb_x;
5001             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
5002             ff_update_block_index(s);
5003
5004             if(ENABLE_H261_ENCODER && s->codec_id == CODEC_ID_H261){
5005                 ff_h261_reorder_mb_index(s);
5006                 xy= s->mb_y*s->mb_stride + s->mb_x;
5007                 mb_type= s->mb_type[xy];
5008             }
5009
5010             /* write gob / video packet header  */
5011             if(s->rtp_mode){
5012                 int current_packet_size, is_gob_start;
5013
5014                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
5015
5016                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
5017
5018                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
5019
5020                 switch(s->codec_id){
5021                 case CODEC_ID_H263:
5022                 case CODEC_ID_H263P:
5023                     if(!s->h263_slice_structured)
5024                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
5025                     break;
5026                 case CODEC_ID_MPEG2VIDEO:
5027                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
5028                 case CODEC_ID_MPEG1VIDEO:
5029                     if(s->mb_skip_run) is_gob_start=0;
5030                     break;
5031                 }
5032
5033                 if(is_gob_start){
5034                     if(s->start_mb_y != mb_y || mb_x!=0){
5035                         write_slice_end(s);
5036
5037                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
5038                             ff_mpeg4_init_partitions(s);
5039                         }
5040                     }
5041
5042                     assert((put_bits_count(&s->pb)&7) == 0);
5043                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
5044
5045                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
5046                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
5047                         int d= 100 / s->avctx->error_rate;
5048                         if(r % d == 0){
5049                             current_packet_size=0;
5050 #ifndef ALT_BITSTREAM_WRITER
5051                             s->pb.buf_ptr= s->ptr_lastgob;
5052 #endif
5053                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
5054                         }
5055                     }
5056
5057                     if (s->avctx->rtp_callback){
5058                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
5059                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
5060                     }
5061
5062                     switch(s->codec_id){
5063                     case CODEC_ID_MPEG4:
5064                         ff_mpeg4_encode_video_packet_header(s);
5065                         ff_mpeg4_clean_buffers(s);
5066                     break;
5067                     case CODEC_ID_MPEG1VIDEO:
5068                     case CODEC_ID_MPEG2VIDEO:
5069                         ff_mpeg1_encode_slice_header(s);
5070                         ff_mpeg1_clean_buffers(s);
5071                     break;
5072                     case CODEC_ID_H263:
5073                     case CODEC_ID_H263P:
5074                         h263_encode_gob_header(s, mb_y);
5075                     break;
5076                     }
5077
5078                     if(s->flags&CODEC_FLAG_PASS1){
5079                         int bits= put_bits_count(&s->pb);
5080                         s->misc_bits+= bits - s->last_bits;
5081                         s->last_bits= bits;
5082                     }
5083
5084                     s->ptr_lastgob += current_packet_size;
5085                     s->first_slice_line=1;
5086                     s->resync_mb_x=mb_x;
5087                     s->resync_mb_y=mb_y;
5088                 }
5089             }
5090
5091             if(  (s->resync_mb_x   == s->mb_x)
5092                && s->resync_mb_y+1 == s->mb_y){
5093                 s->first_slice_line=0;
5094             }
5095
5096             s->mb_skipped=0;
5097             s->dquant=0; //only for QP_RD
5098
5099             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5100                 int next_block=0;
5101                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5102
5103                 copy_context_before_encode(&backup_s, s, -1);
5104                 backup_s.pb= s->pb;
5105                 best_s.data_partitioning= s->data_partitioning;
5106                 best_s.partitioned_frame= s->partitioned_frame;
5107                 if(s->data_partitioning){
5108                     backup_s.pb2= s->pb2;
5109                     backup_s.tex_pb= s->tex_pb;
5110                 }
5111
5112                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5113                     s->mv_dir = MV_DIR_FORWARD;
5114                     s->mv_type = MV_TYPE_16X16;
5115                     s->mb_intra= 0;
5116                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5117                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5118                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5119                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5120                 }
5121                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5122                     s->mv_dir = MV_DIR_FORWARD;
5123                     s->mv_type = MV_TYPE_FIELD;
5124                     s->mb_intra= 0;
5125                     for(i=0; i<2; i++){
5126                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5127                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5128                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5129                     }
5130                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5131                                  &dmin, &next_block, 0, 0);
5132                 }
5133                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5134                     s->mv_dir = MV_DIR_FORWARD;
5135                     s->mv_type = MV_TYPE_16X16;
5136                     s->mb_intra= 0;
5137                     s->mv[0][0][0] = 0;
5138                     s->mv[0][0][1] = 0;
5139                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5140                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5141                 }
5142                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5143                     s->mv_dir = MV_DIR_FORWARD;
5144                     s->mv_type = MV_TYPE_8X8;
5145                     s->mb_intra= 0;
5146                     for(i=0; i<4; i++){
5147                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5148                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5149                     }
5150                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5151                                  &dmin, &next_block, 0, 0);
5152                 }
5153                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5154                     s->mv_dir = MV_DIR_FORWARD;
5155                     s->mv_type = MV_TYPE_16X16;
5156                     s->mb_intra= 0;
5157                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5158                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5159                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5160                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5161                 }
5162                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5163                     s->mv_dir = MV_DIR_BACKWARD;
5164                     s->mv_type = MV_TYPE_16X16;
5165                     s->mb_intra= 0;
5166                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5167                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5169                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5170                 }
5171                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5172                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5173                     s->mv_type = MV_TYPE_16X16;
5174                     s->mb_intra= 0;
5175                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5176                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5177                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5178                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5179                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5180                                  &dmin, &next_block, 0, 0);
5181                 }
5182                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5183                     s->mv_dir = MV_DIR_FORWARD;
5184                     s->mv_type = MV_TYPE_FIELD;
5185                     s->mb_intra= 0;
5186                     for(i=0; i<2; i++){
5187                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5188                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5189                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5190                     }
5191                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5192                                  &dmin, &next_block, 0, 0);
5193                 }
5194                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5195                     s->mv_dir = MV_DIR_BACKWARD;
5196                     s->mv_type = MV_TYPE_FIELD;
5197                     s->mb_intra= 0;
5198                     for(i=0; i<2; i++){
5199                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5200                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5201                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5202                     }
5203                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5204                                  &dmin, &next_block, 0, 0);
5205                 }
5206                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5207                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5208                     s->mv_type = MV_TYPE_FIELD;
5209                     s->mb_intra= 0;
5210                     for(dir=0; dir<2; dir++){
5211                         for(i=0; i<2; i++){
5212                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5213                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5214                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5215                         }
5216                     }
5217                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5218                                  &dmin, &next_block, 0, 0);
5219                 }
5220                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5221                     s->mv_dir = 0;
5222                     s->mv_type = MV_TYPE_16X16;
5223                     s->mb_intra= 1;
5224                     s->mv[0][0][0] = 0;
5225                     s->mv[0][0][1] = 0;
5226                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5227                                  &dmin, &next_block, 0, 0);
5228                     if(s->h263_pred || s->h263_aic){
5229                         if(best_s.mb_intra)
5230                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5231                         else
5232                             ff_clean_intra_table_entries(s); //old mode?
5233                     }
5234                 }
5235
5236                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
5237                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
5238                         const int last_qp= backup_s.qscale;
5239                         int qpi, qp, dc[6];
5240                         DCTELEM ac[6][16];
5241                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5242                         static const int dquant_tab[4]={-1,1,-2,2};
5243
5244                         assert(backup_s.dquant == 0);
5245
5246                         //FIXME intra
5247                         s->mv_dir= best_s.mv_dir;
5248                         s->mv_type = MV_TYPE_16X16;
5249                         s->mb_intra= best_s.mb_intra;
5250                         s->mv[0][0][0] = best_s.mv[0][0][0];
5251                         s->mv[0][0][1] = best_s.mv[0][0][1];
5252                         s->mv[1][0][0] = best_s.mv[1][0][0];
5253                         s->mv[1][0][1] = best_s.mv[1][0][1];
5254
5255                         qpi = s->pict_type == B_TYPE ? 2 : 0;
5256                         for(; qpi<4; qpi++){
5257                             int dquant= dquant_tab[qpi];
5258                             qp= last_qp + dquant;
5259                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5260                                 continue;
5261                             backup_s.dquant= dquant;
5262                             if(s->mb_intra && s->dc_val[0]){
5263                                 for(i=0; i<6; i++){
5264                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5265                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5266                                 }
5267                             }
5268
5269                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5270                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5271                             if(best_s.qscale != qp){
5272                                 if(s->mb_intra && s->dc_val[0]){
5273                                     for(i=0; i<6; i++){
5274                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5275                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5276                                     }
5277                                 }
5278                             }
5279                         }
5280                     }
5281                 }
5282                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5283                     int mx= s->b_direct_mv_table[xy][0];
5284                     int my= s->b_direct_mv_table[xy][1];
5285
5286                     backup_s.dquant = 0;
5287                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5288                     s->mb_intra= 0;
5289                     ff_mpeg4_set_direct_mv(s, mx, my);
5290                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5291                                  &dmin, &next_block, mx, my);
5292                 }
5293                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT0){
5294                     backup_s.dquant = 0;
5295                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5296                     s->mb_intra= 0;
5297                     ff_mpeg4_set_direct_mv(s, 0, 0);
5298                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5299                                  &dmin, &next_block, 0, 0);
5300                 }
5301                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
5302                     int coded=0;
5303                     for(i=0; i<6; i++)
5304                         coded |= s->block_last_index[i];
5305                     if(coded){
5306                         int mx,my;
5307                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
5308                         if(best_s.mv_dir & MV_DIRECT){
5309                             mx=my=0; //FIXME find the one we actually used
5310                             ff_mpeg4_set_direct_mv(s, mx, my);
5311                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
5312                             mx= s->mv[1][0][0];
5313                             my= s->mv[1][0][1];
5314                         }else{
5315                             mx= s->mv[0][0][0];
5316                             my= s->mv[0][0][1];
5317                         }
5318
5319                         s->mv_dir= best_s.mv_dir;
5320                         s->mv_type = best_s.mv_type;
5321                         s->mb_intra= 0;
5322 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
5323                         s->mv[0][0][1] = best_s.mv[0][0][1];
5324                         s->mv[1][0][0] = best_s.mv[1][0][0];
5325                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
5326                         backup_s.dquant= 0;
5327                         s->skipdct=1;
5328                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5329                                         &dmin, &next_block, mx, my);
5330                         s->skipdct=0;
5331                     }
5332                 }
5333
5334                 s->current_picture.qscale_table[xy]= best_s.qscale;
5335
5336                 copy_context_after_encode(s, &best_s, -1);
5337
5338                 pb_bits_count= put_bits_count(&s->pb);
5339                 flush_put_bits(&s->pb);
5340                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5341                 s->pb= backup_s.pb;
5342
5343                 if(s->data_partitioning){
5344                     pb2_bits_count= put_bits_count(&s->pb2);
5345                     flush_put_bits(&s->pb2);
5346                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5347                     s->pb2= backup_s.pb2;
5348
5349                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5350                     flush_put_bits(&s->tex_pb);
5351                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5352                     s->tex_pb= backup_s.tex_pb;
5353                 }
5354                 s->last_bits= put_bits_count(&s->pb);
5355
5356                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5357                     ff_h263_update_motion_val(s);
5358
5359                 if(next_block==0){ //FIXME 16 vs linesize16
5360                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5361                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5362                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5363                 }
5364
5365                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5366                     MPV_decode_mb(s, s->block);
5367             } else {
5368                 int motion_x = 0, motion_y = 0;
5369                 s->mv_type=MV_TYPE_16X16;
5370                 // only one MB-Type possible
5371
5372                 switch(mb_type){
5373                 case CANDIDATE_MB_TYPE_INTRA:
5374                     s->mv_dir = 0;
5375                     s->mb_intra= 1;
5376                     motion_x= s->mv[0][0][0] = 0;
5377                     motion_y= s->mv[0][0][1] = 0;
5378                     break;
5379                 case CANDIDATE_MB_TYPE_INTER:
5380                     s->mv_dir = MV_DIR_FORWARD;
5381                     s->mb_intra= 0;
5382                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5383                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5384                     break;
5385                 case CANDIDATE_MB_TYPE_INTER_I:
5386                     s->mv_dir = MV_DIR_FORWARD;
5387                     s->mv_type = MV_TYPE_FIELD;
5388                     s->mb_intra= 0;
5389                     for(i=0; i<2; i++){
5390                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5391                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5392                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5393                     }
5394                     break;
5395                 case CANDIDATE_MB_TYPE_INTER4V:
5396                     s->mv_dir = MV_DIR_FORWARD;
5397                     s->mv_type = MV_TYPE_8X8;
5398                     s->mb_intra= 0;
5399                     for(i=0; i<4; i++){
5400                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5401                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5402                     }
5403                     break;
5404                 case CANDIDATE_MB_TYPE_DIRECT:
5405                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5406                     s->mb_intra= 0;
5407                     motion_x=s->b_direct_mv_table[xy][0];
5408                     motion_y=s->b_direct_mv_table[xy][1];
5409                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5410                     break;
5411                 case CANDIDATE_MB_TYPE_DIRECT0:
5412                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5413                     s->mb_intra= 0;
5414                     ff_mpeg4_set_direct_mv(s, 0, 0);
5415                     break;
5416                 case CANDIDATE_MB_TYPE_BIDIR:
5417                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5418                     s->mb_intra= 0;
5419                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5420                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5421                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5422                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5423                     break;
5424                 case CANDIDATE_MB_TYPE_BACKWARD:
5425                     s->mv_dir = MV_DIR_BACKWARD;
5426                     s->mb_intra= 0;
5427                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5428                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5429                     break;
5430                 case CANDIDATE_MB_TYPE_FORWARD:
5431                     s->mv_dir = MV_DIR_FORWARD;
5432                     s->mb_intra= 0;
5433                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5434                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5435 //                    printf(" %d %d ", motion_x, motion_y);
5436                     break;
5437                 case CANDIDATE_MB_TYPE_FORWARD_I:
5438                     s->mv_dir = MV_DIR_FORWARD;
5439                     s->mv_type = MV_TYPE_FIELD;
5440                     s->mb_intra= 0;
5441                     for(i=0; i<2; i++){
5442                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5443                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5444                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5445                     }
5446                     break;
5447                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5448                     s->mv_dir = MV_DIR_BACKWARD;
5449                     s->mv_type = MV_TYPE_FIELD;
5450                     s->mb_intra= 0;
5451                     for(i=0; i<2; i++){
5452                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5453                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5454                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5455                     }
5456                     break;
5457                 case CANDIDATE_MB_TYPE_BIDIR_I:
5458                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5459                     s->mv_type = MV_TYPE_FIELD;
5460                     s->mb_intra= 0;
5461                     for(dir=0; dir<2; dir++){
5462                         for(i=0; i<2; i++){
5463                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5464                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5465                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5466                         }
5467                     }
5468                     break;
5469                 default:
5470                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5471                 }
5472
5473                 encode_mb(s, motion_x, motion_y);
5474
5475                 // RAL: Update last macroblock type
5476                 s->last_mv_dir = s->mv_dir;
5477
5478                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5479                     ff_h263_update_motion_val(s);
5480
5481                 MPV_decode_mb(s, s->block);
5482             }
5483
5484             /* clean the MV table in IPS frames for direct mode in B frames */
5485             if(s->mb_intra /* && I,P,S_TYPE */){
5486                 s->p_mv_table[xy][0]=0;
5487                 s->p_mv_table[xy][1]=0;
5488             }
5489
5490             if(s->flags&CODEC_FLAG_PSNR){
5491                 int w= 16;
5492                 int h= 16;
5493
5494                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5495                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5496
5497                 s->current_picture.error[0] += sse(
5498                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5499                     s->dest[0], w, h, s->linesize);
5500                 s->current_picture.error[1] += sse(
5501                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5502                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5503                 s->current_picture.error[2] += sse(
5504                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5505                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5506             }
5507             if(s->loop_filter){
5508                 if(s->out_format == FMT_H263)
5509                     ff_h263_loop_filter(s);
5510             }
5511 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5512         }
5513     }
5514
5515     //not beautiful here but we must write it before flushing so it has to be here
5516     if (ENABLE_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5517         msmpeg4_encode_ext_header(s);
5518
5519     write_slice_end(s);
5520
5521     /* Send the last GOB if RTP */
5522     if (s->avctx->rtp_callback) {
5523         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5524         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5525         /* Call the RTP callback to send the last GOB */
5526         emms_c();
5527         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5528     }
5529
5530     return 0;
5531 }
5532
5533 #define MERGE(field) dst->field += src->field; src->field=0
5534 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5535     MERGE(me.scene_change_score);
5536     MERGE(me.mc_mb_var_sum_temp);
5537     MERGE(me.mb_var_sum_temp);
5538 }
5539
5540 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5541     int i;
5542
5543     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5544     MERGE(dct_count[1]);
5545     MERGE(mv_bits);
5546     MERGE(i_tex_bits);
5547     MERGE(p_tex_bits);
5548     MERGE(i_count);
5549     MERGE(f_count);
5550     MERGE(b_count);
5551     MERGE(skip_count);
5552     MERGE(misc_bits);
5553     MERGE(error_count);
5554     MERGE(padding_bug_score);
5555     MERGE(current_picture.error[0]);
5556     MERGE(current_picture.error[1]);
5557     MERGE(current_picture.error[2]);
5558
5559     if(dst->avctx->noise_reduction){
5560         for(i=0; i<64; i++){
5561             MERGE(dct_error_sum[0][i]);
5562             MERGE(dct_error_sum[1][i]);
5563         }
5564     }
5565
5566     assert(put_bits_count(&src->pb) % 8 ==0);
5567     assert(put_bits_count(&dst->pb) % 8 ==0);
5568     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5569     flush_put_bits(&dst->pb);
5570 }
5571
5572 static int estimate_qp(MpegEncContext *s, int dry_run){
5573     if (s->next_lambda){
5574         s->current_picture_ptr->quality=
5575         s->current_picture.quality = s->next_lambda;
5576         if(!dry_run) s->next_lambda= 0;
5577     } else if (!s->fixed_qscale) {
5578         s->current_picture_ptr->quality=
5579         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5580         if (s->current_picture.quality < 0)
5581             return -1;
5582     }
5583
5584     if(s->adaptive_quant){
5585         switch(s->codec_id){
5586         case CODEC_ID_MPEG4:
5587             ff_clean_mpeg4_qscales(s);
5588             break;
5589         case CODEC_ID_H263:
5590         case CODEC_ID_H263P:
5591         case CODEC_ID_FLV1:
5592             ff_clean_h263_qscales(s);
5593             break;
5594         }
5595
5596         s->lambda= s->lambda_table[0];
5597         //FIXME broken
5598     }else
5599         s->lambda= s->current_picture.quality;
5600 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5601     update_qscale(s);
5602     return 0;
5603 }
5604
5605 static int encode_picture(MpegEncContext *s, int picture_number)
5606 {
5607     int i;
5608     int bits;
5609
5610     s->picture_number = picture_number;
5611
5612     /* Reset the average MB variance */
5613     s->me.mb_var_sum_temp    =
5614     s->me.mc_mb_var_sum_temp = 0;
5615
5616     /* we need to initialize some time vars before we can encode b-frames */
5617     // RAL: Condition added for MPEG1VIDEO
5618     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5619         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5620
5621     s->me.scene_change_score=0;
5622
5623 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5624
5625     if(s->pict_type==I_TYPE){
5626         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5627         else                        s->no_rounding=0;
5628     }else if(s->pict_type!=B_TYPE){
5629         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5630             s->no_rounding ^= 1;
5631     }
5632
5633     if(s->flags & CODEC_FLAG_PASS2){
5634         if (estimate_qp(s,1) < 0)
5635             return -1;
5636         ff_get_2pass_fcode(s);
5637     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5638         if(s->pict_type==B_TYPE)
5639             s->lambda= s->last_lambda_for[s->pict_type];
5640         else
5641             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5642         update_qscale(s);
5643     }
5644
5645     s->mb_intra=0; //for the rate distortion & bit compare functions
5646     for(i=1; i<s->avctx->thread_count; i++){
5647         ff_update_duplicate_context(s->thread_context[i], s);
5648     }
5649
5650     ff_init_me(s);
5651
5652     /* Estimate motion for every MB */
5653     if(s->pict_type != I_TYPE){
5654         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5655         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
5656         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5657             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5658                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5659             }
5660         }
5661
5662         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5663     }else /* if(s->pict_type == I_TYPE) */{
5664         /* I-Frame */
5665         for(i=0; i<s->mb_stride*s->mb_height; i++)
5666             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5667
5668         if(!s->fixed_qscale){
5669             /* finding spatial complexity for I-frame rate control */
5670             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5671         }
5672     }
5673     for(i=1; i<s->avctx->thread_count; i++){
5674         merge_context_after_me(s, s->thread_context[i]);
5675     }
5676     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5677     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5678     emms_c();
5679
5680     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5681         s->pict_type= I_TYPE;
5682         for(i=0; i<s->mb_stride*s->mb_height; i++)
5683             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5684 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5685     }
5686
5687     if(!s->umvplus){
5688         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5689             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5690
5691             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5692                 int a,b;
5693                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5694                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5695                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5696             }
5697
5698             ff_fix_long_p_mvs(s);
5699             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5700             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5701                 int j;
5702                 for(i=0; i<2; i++){
5703                     for(j=0; j<2; j++)
5704                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5705                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5706                 }
5707             }
5708         }
5709
5710         if(s->pict_type==B_TYPE){
5711             int a, b;
5712
5713             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5714             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5715             s->f_code = FFMAX(a, b);
5716
5717             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5718             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5719             s->b_code = FFMAX(a, b);
5720
5721             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5722             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5723             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5724             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5725             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5726                 int dir, j;
5727                 for(dir=0; dir<2; dir++){
5728                     for(i=0; i<2; i++){
5729                         for(j=0; j<2; j++){
5730                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5731                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5732                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5733                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5734                         }
5735                     }
5736                 }
5737             }
5738         }
5739     }
5740
5741     if (estimate_qp(s, 0) < 0)
5742         return -1;
5743
5744     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5745         s->qscale= 3; //reduce clipping problems
5746
5747     if (s->out_format == FMT_MJPEG) {
5748         /* for mjpeg, we do include qscale in the matrix */
5749         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5750         for(i=1;i<64;i++){
5751             int j= s->dsp.idct_permutation[i];
5752
5753             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5754         }
5755         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5756                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5757         s->qscale= 8;
5758     }
5759
5760     //FIXME var duplication
5761     s->current_picture_ptr->key_frame=
5762     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5763     s->current_picture_ptr->pict_type=
5764     s->current_picture.pict_type= s->pict_type;
5765
5766     if(s->current_picture.key_frame)
5767         s->picture_in_gop_number=0;
5768
5769     s->last_bits= put_bits_count(&s->pb);
5770     switch(s->out_format) {
5771     case FMT_MJPEG:
5772         if (ENABLE_MJPEG_ENCODER)
5773             ff_mjpeg_encode_picture_header(s);
5774         break;
5775     case FMT_H261:
5776         if (ENABLE_H261_ENCODER)
5777             ff_h261_encode_picture_header(s, picture_number);
5778         break;
5779     case FMT_H263:
5780         if (ENABLE_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
5781             ff_wmv2_encode_picture_header(s, picture_number);
5782         else if (ENABLE_MSMPEG4_ENCODER && s->h263_msmpeg4)
5783             msmpeg4_encode_picture_header(s, picture_number);
5784         else if (s->h263_pred)
5785             mpeg4_encode_picture_header(s, picture_number);
5786         else if (ENABLE_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
5787             rv10_encode_picture_header(s, picture_number);
5788         else if (ENABLE_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
5789             rv20_encode_picture_header(s, picture_number);
5790         else if (s->codec_id == CODEC_ID_FLV1)
5791             ff_flv_encode_picture_header(s, picture_number);
5792         else
5793             h263_encode_picture_header(s, picture_number);
5794         break;
5795     case FMT_MPEG1:
5796         mpeg1_encode_picture_header(s, picture_number);
5797         break;
5798     case FMT_H264:
5799         break;
5800     default:
5801         assert(0);
5802     }
5803     bits= put_bits_count(&s->pb);
5804     s->header_bits= bits - s->last_bits;
5805
5806     for(i=1; i<s->avctx->thread_count; i++){
5807         update_duplicate_context_after_me(s->thread_context[i], s);
5808     }
5809     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5810     for(i=1; i<s->avctx->thread_count; i++){
5811         merge_context_after_encode(s, s->thread_context[i]);
5812     }
5813     emms_c();
5814     return 0;
5815 }
5816
5817 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5818     const int intra= s->mb_intra;
5819     int i;
5820
5821     s->dct_count[intra]++;
5822
5823     for(i=0; i<64; i++){
5824         int level= block[i];
5825
5826         if(level){
5827             if(level>0){
5828                 s->dct_error_sum[intra][i] += level;
5829                 level -= s->dct_offset[intra][i];
5830                 if(level<0) level=0;
5831             }else{
5832                 s->dct_error_sum[intra][i] -= level;
5833                 level += s->dct_offset[intra][i];
5834                 if(level>0) level=0;
5835             }
5836             block[i]= level;
5837         }
5838     }
5839 }
5840
5841 static int dct_quantize_trellis_c(MpegEncContext *s,
5842                         DCTELEM *block, int n,
5843                         int qscale, int *overflow){
5844     const int *qmat;
5845     const uint8_t *scantable= s->intra_scantable.scantable;
5846     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5847     int max=0;
5848     unsigned int threshold1, threshold2;
5849     int bias=0;
5850     int run_tab[65];
5851     int level_tab[65];
5852     int score_tab[65];
5853     int survivor[65];
5854     int survivor_count;
5855     int last_run=0;
5856     int last_level=0;
5857     int last_score= 0;
5858     int last_i;
5859     int coeff[2][64];
5860     int coeff_count[64];
5861     int qmul, qadd, start_i, last_non_zero, i, dc;
5862     const int esc_length= s->ac_esc_length;
5863     uint8_t * length;
5864     uint8_t * last_length;
5865     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5866
5867     s->dsp.fdct (block);
5868
5869     if(s->dct_error_sum)
5870         s->denoise_dct(s, block);
5871     qmul= qscale*16;
5872     qadd= ((qscale-1)|1)*8;
5873
5874     if (s->mb_intra) {
5875         int q;
5876         if (!s->h263_aic) {
5877             if (n < 4)
5878                 q = s->y_dc_scale;
5879             else
5880                 q = s->c_dc_scale;
5881             q = q << 3;
5882         } else{
5883             /* For AIC we skip quant/dequant of INTRADC */
5884             q = 1 << 3;
5885             qadd=0;
5886         }
5887
5888         /* note: block[0] is assumed to be positive */
5889         block[0] = (block[0] + (q >> 1)) / q;
5890         start_i = 1;
5891         last_non_zero = 0;
5892         qmat = s->q_intra_matrix[qscale];
5893         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5894             bias= 1<<(QMAT_SHIFT-1);
5895         length     = s->intra_ac_vlc_length;
5896         last_length= s->intra_ac_vlc_last_length;
5897     } else {
5898         start_i = 0;
5899         last_non_zero = -1;
5900         qmat = s->q_inter_matrix[qscale];
5901         length     = s->inter_ac_vlc_length;
5902         last_length= s->inter_ac_vlc_last_length;
5903     }
5904     last_i= start_i;
5905
5906     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5907     threshold2= (threshold1<<1);
5908
5909     for(i=63; i>=start_i; i--) {
5910         const int j = scantable[i];
5911         int level = block[j] * qmat[j];
5912
5913         if(((unsigned)(level+threshold1))>threshold2){
5914             last_non_zero = i;
5915             break;
5916         }
5917     }
5918
5919     for(i=start_i; i<=last_non_zero; i++) {
5920         const int j = scantable[i];
5921         int level = block[j] * qmat[j];
5922
5923 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5924 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5925         if(((unsigned)(level+threshold1))>threshold2){
5926             if(level>0){
5927                 level= (bias + level)>>QMAT_SHIFT;
5928                 coeff[0][i]= level;
5929                 coeff[1][i]= level-1;
5930 //                coeff[2][k]= level-2;
5931             }else{
5932                 level= (bias - level)>>QMAT_SHIFT;
5933                 coeff[0][i]= -level;
5934                 coeff[1][i]= -level+1;
5935 //                coeff[2][k]= -level+2;
5936             }
5937             coeff_count[i]= FFMIN(level, 2);
5938             assert(coeff_count[i]);
5939             max |=level;
5940         }else{
5941             coeff[0][i]= (level>>31)|1;
5942             coeff_count[i]= 1;
5943         }
5944     }
5945
5946     *overflow= s->max_qcoeff < max; //overflow might have happened
5947
5948     if(last_non_zero < start_i){
5949         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5950         return last_non_zero;
5951     }
5952
5953     score_tab[start_i]= 0;
5954     survivor[0]= start_i;
5955     survivor_count= 1;
5956
5957     for(i=start_i; i<=last_non_zero; i++){
5958         int level_index, j;
5959         const int dct_coeff= FFABS(block[ scantable[i] ]);
5960         const int zero_distoration= dct_coeff*dct_coeff;
5961         int best_score=256*256*256*120;
5962         for(level_index=0; level_index < coeff_count[i]; level_index++){
5963             int distoration;
5964             int level= coeff[level_index][i];
5965             const int alevel= FFABS(level);
5966             int unquant_coeff;
5967
5968             assert(level);
5969
5970             if(s->out_format == FMT_H263){
5971                 unquant_coeff= alevel*qmul + qadd;
5972             }else{ //MPEG1
5973                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5974                 if(s->mb_intra){
5975                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5976                         unquant_coeff =   (unquant_coeff - 1) | 1;
5977                 }else{
5978                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5979                         unquant_coeff =   (unquant_coeff - 1) | 1;
5980                 }
5981                 unquant_coeff<<= 3;
5982             }
5983
5984             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5985             level+=64;
5986             if((level&(~127)) == 0){
5987                 for(j=survivor_count-1; j>=0; j--){
5988                     int run= i - survivor[j];
5989                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5990                     score += score_tab[i-run];
5991
5992                     if(score < best_score){
5993                         best_score= score;
5994                         run_tab[i+1]= run;
5995                         level_tab[i+1]= level-64;
5996                     }
5997                 }
5998
5999                 if(s->out_format == FMT_H263){
6000                     for(j=survivor_count-1; j>=0; j--){
6001                         int run= i - survivor[j];
6002                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
6003                         score += score_tab[i-run];
6004                         if(score < last_score){
6005                             last_score= score;
6006                             last_run= run;
6007                             last_level= level-64;
6008                             last_i= i+1;
6009                         }
6010                     }
6011                 }
6012             }else{
6013                 distoration += esc_length*lambda;
6014                 for(j=survivor_count-1; j>=0; j--){
6015                     int run= i - survivor[j];
6016                     int score= distoration + score_tab[i-run];
6017
6018                     if(score < best_score){
6019                         best_score= score;
6020                         run_tab[i+1]= run;
6021                         level_tab[i+1]= level-64;
6022                     }
6023                 }
6024
6025                 if(s->out_format == FMT_H263){
6026                   for(j=survivor_count-1; j>=0; j--){
6027                         int run= i - survivor[j];
6028                         int score= distoration + score_tab[i-run];
6029                         if(score < last_score){
6030                             last_score= score;
6031                             last_run= run;
6032                             last_level= level-64;
6033                             last_i= i+1;
6034                         }
6035                     }
6036                 }
6037             }
6038         }
6039
6040         score_tab[i+1]= best_score;
6041
6042         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
6043         if(last_non_zero <= 27){
6044             for(; survivor_count; survivor_count--){
6045                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
6046                     break;
6047             }
6048         }else{
6049             for(; survivor_count; survivor_count--){
6050                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
6051                     break;
6052             }
6053         }
6054
6055         survivor[ survivor_count++ ]= i+1;
6056     }
6057
6058     if(s->out_format != FMT_H263){
6059         last_score= 256*256*256*120;
6060         for(i= survivor[0]; i<=last_non_zero + 1; i++){
6061             int score= score_tab[i];
6062             if(i) score += lambda*2; //FIXME exacter?
6063
6064             if(score < last_score){
6065                 last_score= score;
6066                 last_i= i;
6067                 last_level= level_tab[i];
6068                 last_run= run_tab[i];
6069             }
6070         }
6071     }
6072
6073     s->coded_score[n] = last_score;
6074
6075     dc= FFABS(block[0]);
6076     last_non_zero= last_i - 1;
6077     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
6078
6079     if(last_non_zero < start_i)
6080         return last_non_zero;
6081
6082     if(last_non_zero == 0 && start_i == 0){
6083         int best_level= 0;
6084         int best_score= dc * dc;
6085
6086         for(i=0; i<coeff_count[0]; i++){
6087             int level= coeff[i][0];
6088             int alevel= FFABS(level);
6089             int unquant_coeff, score, distortion;
6090
6091             if(s->out_format == FMT_H263){
6092                     unquant_coeff= (alevel*qmul + qadd)>>3;
6093             }else{ //MPEG1
6094                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
6095                     unquant_coeff =   (unquant_coeff - 1) | 1;
6096             }
6097             unquant_coeff = (unquant_coeff + 4) >> 3;
6098             unquant_coeff<<= 3 + 3;
6099
6100             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
6101             level+=64;
6102             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6103             else                    score= distortion + esc_length*lambda;
6104
6105             if(score < best_score){
6106                 best_score= score;
6107                 best_level= level - 64;
6108             }
6109         }
6110         block[0]= best_level;
6111         s->coded_score[n] = best_score - dc*dc;
6112         if(best_level == 0) return -1;
6113         else                return last_non_zero;
6114     }
6115
6116     i= last_i;
6117     assert(last_level);
6118
6119     block[ perm_scantable[last_non_zero] ]= last_level;
6120     i -= last_run + 1;
6121
6122     for(; i>start_i; i -= run_tab[i] + 1){
6123         block[ perm_scantable[i-1] ]= level_tab[i];
6124     }
6125
6126     return last_non_zero;
6127 }
6128
6129 //#define REFINE_STATS 1
6130 static int16_t basis[64][64];
6131
6132 static void build_basis(uint8_t *perm){
6133     int i, j, x, y;
6134     emms_c();
6135     for(i=0; i<8; i++){
6136         for(j=0; j<8; j++){
6137             for(y=0; y<8; y++){
6138                 for(x=0; x<8; x++){
6139                     double s= 0.25*(1<<BASIS_SHIFT);
6140                     int index= 8*i + j;
6141                     int perm_index= perm[index];
6142                     if(i==0) s*= sqrt(0.5);
6143                     if(j==0) s*= sqrt(0.5);
6144                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6145                 }
6146             }
6147         }
6148     }
6149 }
6150
6151 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6152                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6153                         int n, int qscale){
6154     int16_t rem[64];
6155     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6156     const int *qmat;
6157     const uint8_t *scantable= s->intra_scantable.scantable;
6158     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6159 //    unsigned int threshold1, threshold2;
6160 //    int bias=0;
6161     int run_tab[65];
6162     int prev_run=0;
6163     int prev_level=0;
6164     int qmul, qadd, start_i, last_non_zero, i, dc;
6165     uint8_t * length;
6166     uint8_t * last_length;
6167     int lambda;
6168     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
6169 #ifdef REFINE_STATS
6170 static int count=0;
6171 static int after_last=0;
6172 static int to_zero=0;
6173 static int from_zero=0;
6174 static int raise=0;
6175 static int lower=0;
6176 static int messed_sign=0;
6177 #endif
6178
6179     if(basis[0][0] == 0)
6180         build_basis(s->dsp.idct_permutation);
6181
6182     qmul= qscale*2;
6183     qadd= (qscale-1)|1;
6184     if (s->mb_intra) {
6185         if (!s->h263_aic) {
6186             if (n < 4)
6187                 q = s->y_dc_scale;
6188             else
6189                 q = s->c_dc_scale;
6190         } else{
6191             /* For AIC we skip quant/dequant of INTRADC */
6192             q = 1;
6193             qadd=0;
6194         }
6195         q <<= RECON_SHIFT-3;
6196         /* note: block[0] is assumed to be positive */
6197         dc= block[0]*q;
6198 //        block[0] = (block[0] + (q >> 1)) / q;
6199         start_i = 1;
6200         qmat = s->q_intra_matrix[qscale];
6201 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6202 //            bias= 1<<(QMAT_SHIFT-1);
6203         length     = s->intra_ac_vlc_length;
6204         last_length= s->intra_ac_vlc_last_length;
6205     } else {
6206         dc= 0;
6207         start_i = 0;
6208         qmat = s->q_inter_matrix[qscale];
6209         length     = s->inter_ac_vlc_length;
6210         last_length= s->inter_ac_vlc_last_length;
6211     }
6212     last_non_zero = s->block_last_index[n];
6213
6214 #ifdef REFINE_STATS
6215 {START_TIMER
6216 #endif
6217     dc += (1<<(RECON_SHIFT-1));
6218     for(i=0; i<64; i++){
6219         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6220     }
6221 #ifdef REFINE_STATS
6222 STOP_TIMER("memset rem[]")}
6223 #endif
6224     sum=0;
6225     for(i=0; i<64; i++){
6226         int one= 36;
6227         int qns=4;
6228         int w;
6229
6230         w= FFABS(weight[i]) + qns*one;
6231         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6232
6233         weight[i] = w;
6234 //        w=weight[i] = (63*qns + (w/2)) / w;
6235
6236         assert(w>0);
6237         assert(w<(1<<6));
6238         sum += w*w;
6239     }
6240     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6241 #ifdef REFINE_STATS
6242 {START_TIMER
6243 #endif
6244     run=0;
6245     rle_index=0;
6246     for(i=start_i; i<=last_non_zero; i++){
6247         int j= perm_scantable[i];
6248         const int level= block[j];
6249         int coeff;
6250
6251         if(level){
6252             if(level<0) coeff= qmul*level - qadd;
6253             else        coeff= qmul*level + qadd;
6254             run_tab[rle_index++]=run;
6255             run=0;
6256
6257             s->dsp.add_8x8basis(rem, basis[j], coeff);
6258         }else{
6259             run++;
6260         }
6261     }
6262 #ifdef REFINE_STATS
6263 if(last_non_zero>0){
6264 STOP_TIMER("init rem[]")
6265 }
6266 }
6267
6268 {START_TIMER
6269 #endif
6270     for(;;){
6271         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6272         int best_coeff=0;
6273         int best_change=0;
6274         int run2, best_unquant_change=0, analyze_gradient;
6275 #ifdef REFINE_STATS
6276 {START_TIMER
6277 #endif
6278         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6279
6280         if(analyze_gradient){
6281 #ifdef REFINE_STATS
6282 {START_TIMER
6283 #endif
6284             for(i=0; i<64; i++){
6285                 int w= weight[i];
6286
6287                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6288             }
6289 #ifdef REFINE_STATS
6290 STOP_TIMER("rem*w*w")}
6291 {START_TIMER
6292 #endif
6293             s->dsp.fdct(d1);
6294 #ifdef REFINE_STATS
6295 STOP_TIMER("dct")}
6296 #endif
6297         }
6298
6299         if(start_i){
6300             const int level= block[0];
6301             int change, old_coeff;
6302
6303             assert(s->mb_intra);
6304
6305             old_coeff= q*level;
6306
6307             for(change=-1; change<=1; change+=2){
6308                 int new_level= level + change;
6309                 int score, new_coeff;
6310
6311                 new_coeff= q*new_level;
6312                 if(new_coeff >= 2048 || new_coeff < 0)
6313                     continue;
6314
6315                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6316                 if(score<best_score){
6317                     best_score= score;
6318                     best_coeff= 0;
6319                     best_change= change;
6320                     best_unquant_change= new_coeff - old_coeff;
6321                 }
6322             }
6323         }
6324
6325         run=0;
6326         rle_index=0;
6327         run2= run_tab[rle_index++];
6328         prev_level=0;
6329         prev_run=0;
6330
6331         for(i=start_i; i<64; i++){
6332             int j= perm_scantable[i];
6333             const int level= block[j];
6334             int change, old_coeff;
6335
6336             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6337                 break;
6338
6339             if(level){
6340                 if(level<0) old_coeff= qmul*level - qadd;
6341                 else        old_coeff= qmul*level + qadd;
6342                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6343             }else{
6344                 old_coeff=0;
6345                 run2--;
6346                 assert(run2>=0 || i >= last_non_zero );
6347             }
6348
6349             for(change=-1; change<=1; change+=2){
6350                 int new_level= level + change;
6351                 int score, new_coeff, unquant_change;
6352
6353                 score=0;
6354                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
6355                    continue;
6356
6357                 if(new_level){
6358                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6359                     else            new_coeff= qmul*new_level + qadd;
6360                     if(new_coeff >= 2048 || new_coeff <= -2048)
6361                         continue;
6362                     //FIXME check for overflow
6363
6364                     if(level){
6365                         if(level < 63 && level > -63){
6366                             if(i < last_non_zero)
6367                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6368                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6369                             else
6370                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6371                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6372                         }
6373                     }else{
6374                         assert(FFABS(new_level)==1);
6375
6376                         if(analyze_gradient){
6377                             int g= d1[ scantable[i] ];
6378                             if(g && (g^new_level) >= 0)
6379                                 continue;
6380                         }
6381
6382                         if(i < last_non_zero){
6383                             int next_i= i + run2 + 1;
6384                             int next_level= block[ perm_scantable[next_i] ] + 64;
6385
6386                             if(next_level&(~127))
6387                                 next_level= 0;
6388
6389                             if(next_i < last_non_zero)
6390                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6391                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6392                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6393                             else
6394                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6395                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6396                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6397                         }else{
6398                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6399                             if(prev_level){
6400                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6401                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6402                             }
6403                         }
6404                     }
6405                 }else{
6406                     new_coeff=0;
6407                     assert(FFABS(level)==1);
6408
6409                     if(i < last_non_zero){
6410                         int next_i= i + run2 + 1;
6411                         int next_level= block[ perm_scantable[next_i] ] + 64;
6412
6413                         if(next_level&(~127))
6414                             next_level= 0;
6415
6416                         if(next_i < last_non_zero)
6417                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6418                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6419                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6420                         else
6421                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6422                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6423                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6424                     }else{
6425                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6426                         if(prev_level){
6427                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6428                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6429                         }
6430                     }
6431                 }
6432
6433                 score *= lambda;
6434
6435                 unquant_change= new_coeff - old_coeff;
6436                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6437
6438                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6439                 if(score<best_score){
6440                     best_score= score;
6441                     best_coeff= i;
6442                     best_change= change;
6443                     best_unquant_change= unquant_change;
6444                 }
6445             }
6446             if(level){
6447                 prev_level= level + 64;
6448                 if(prev_level&(~127))
6449                     prev_level= 0;
6450                 prev_run= run;
6451                 run=0;
6452             }else{
6453                 run++;
6454             }
6455         }
6456 #ifdef REFINE_STATS
6457 STOP_TIMER("iterative step")}
6458 #endif
6459
6460         if(best_change){
6461             int j= perm_scantable[ best_coeff ];
6462
6463             block[j] += best_change;
6464
6465             if(best_coeff > last_non_zero){
6466                 last_non_zero= best_coeff;
6467                 assert(block[j]);
6468 #ifdef REFINE_STATS
6469 after_last++;
6470 #endif
6471             }else{
6472 #ifdef REFINE_STATS
6473 if(block[j]){
6474     if(block[j] - best_change){
6475         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
6476             raise++;
6477         }else{
6478             lower++;
6479         }
6480     }else{
6481         from_zero++;
6482     }
6483 }else{
6484     to_zero++;
6485 }
6486 #endif
6487                 for(; last_non_zero>=start_i; last_non_zero--){
6488                     if(block[perm_scantable[last_non_zero]])
6489                         break;
6490                 }
6491             }
6492 #ifdef REFINE_STATS
6493 count++;
6494 if(256*256*256*64 % count == 0){
6495     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6496 }
6497 #endif
6498             run=0;
6499             rle_index=0;
6500             for(i=start_i; i<=last_non_zero; i++){
6501                 int j= perm_scantable[i];
6502                 const int level= block[j];
6503
6504                  if(level){
6505                      run_tab[rle_index++]=run;
6506                      run=0;
6507                  }else{
6508                      run++;
6509                  }
6510             }
6511
6512             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6513         }else{
6514             break;
6515         }
6516     }
6517 #ifdef REFINE_STATS
6518 if(last_non_zero>0){
6519 STOP_TIMER("iterative search")
6520 }
6521 }
6522 #endif
6523
6524     return last_non_zero;
6525 }
6526
6527 static int dct_quantize_c(MpegEncContext *s,
6528                         DCTELEM *block, int n,
6529                         int qscale, int *overflow)
6530 {
6531     int i, j, level, last_non_zero, q, start_i;
6532     const int *qmat;
6533     const uint8_t *scantable= s->intra_scantable.scantable;
6534     int bias;
6535     int max=0;
6536     unsigned int threshold1, threshold2;
6537
6538     s->dsp.fdct (block);
6539
6540     if(s->dct_error_sum)
6541         s->denoise_dct(s, block);
6542
6543     if (s->mb_intra) {
6544         if (!s->h263_aic) {
6545             if (n < 4)
6546                 q = s->y_dc_scale;
6547             else
6548                 q = s->c_dc_scale;
6549             q = q << 3;
6550         } else
6551             /* For AIC we skip quant/dequant of INTRADC */
6552             q = 1 << 3;
6553
6554         /* note: block[0] is assumed to be positive */
6555         block[0] = (block[0] + (q >> 1)) / q;
6556         start_i = 1;
6557         last_non_zero = 0;
6558         qmat = s->q_intra_matrix[qscale];
6559         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6560     } else {
6561         start_i = 0;
6562         last_non_zero = -1;
6563         qmat = s->q_inter_matrix[qscale];
6564         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6565     }
6566     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6567     threshold2= (threshold1<<1);
6568     for(i=63;i>=start_i;i--) {
6569         j = scantable[i];
6570         level = block[j] * qmat[j];
6571
6572         if(((unsigned)(level+threshold1))>threshold2){
6573             last_non_zero = i;
6574             break;
6575         }else{
6576             block[j]=0;
6577         }
6578     }
6579     for(i=start_i; i<=last_non_zero; i++) {
6580         j = scantable[i];
6581         level = block[j] * qmat[j];
6582
6583 //        if(   bias+level >= (1<<QMAT_SHIFT)
6584 //           || bias-level >= (1<<QMAT_SHIFT)){
6585         if(((unsigned)(level+threshold1))>threshold2){
6586             if(level>0){
6587                 level= (bias + level)>>QMAT_SHIFT;
6588                 block[j]= level;
6589             }else{
6590                 level= (bias - level)>>QMAT_SHIFT;
6591                 block[j]= -level;
6592             }
6593             max |=level;
6594         }else{
6595             block[j]=0;
6596         }
6597     }
6598     *overflow= s->max_qcoeff < max; //overflow might have happened
6599
6600     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6601     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6602         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6603
6604     return last_non_zero;
6605 }
6606
6607 #endif //CONFIG_ENCODERS
6608
6609 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6610                                    DCTELEM *block, int n, int qscale)
6611 {
6612     int i, level, nCoeffs;
6613     const uint16_t *quant_matrix;
6614
6615     nCoeffs= s->block_last_index[n];
6616
6617     if (n < 4)
6618         block[0] = block[0] * s->y_dc_scale;
6619     else
6620         block[0] = block[0] * s->c_dc_scale;
6621     /* XXX: only mpeg1 */
6622     quant_matrix = s->intra_matrix;
6623     for(i=1;i<=nCoeffs;i++) {
6624         int j= s->intra_scantable.permutated[i];
6625         level = block[j];
6626         if (level) {
6627             if (level < 0) {
6628                 level = -level;
6629                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6630                 level = (level - 1) | 1;
6631                 level = -level;
6632             } else {
6633                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6634                 level = (level - 1) | 1;
6635             }
6636             block[j] = level;
6637         }
6638     }
6639 }
6640
6641 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6642                                    DCTELEM *block, int n, int qscale)
6643 {
6644     int i, level, nCoeffs;
6645     const uint16_t *quant_matrix;
6646
6647     nCoeffs= s->block_last_index[n];
6648
6649     quant_matrix = s->inter_matrix;
6650     for(i=0; i<=nCoeffs; i++) {
6651         int j= s->intra_scantable.permutated[i];
6652         level = block[j];
6653         if (level) {
6654             if (level < 0) {
6655                 level = -level;
6656                 level = (((level << 1) + 1) * qscale *
6657                          ((int) (quant_matrix[j]))) >> 4;
6658                 level = (level - 1) | 1;
6659                 level = -level;
6660             } else {
6661                 level = (((level << 1) + 1) * qscale *
6662                          ((int) (quant_matrix[j]))) >> 4;
6663                 level = (level - 1) | 1;
6664             }
6665             block[j] = level;
6666         }
6667     }
6668 }
6669
6670 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6671                                    DCTELEM *block, int n, int qscale)
6672 {
6673     int i, level, nCoeffs;
6674     const uint16_t *quant_matrix;
6675
6676     if(s->alternate_scan) nCoeffs= 63;
6677     else nCoeffs= s->block_last_index[n];
6678
6679     if (n < 4)
6680         block[0] = block[0] * s->y_dc_scale;
6681     else
6682         block[0] = block[0] * s->c_dc_scale;
6683     quant_matrix = s->intra_matrix;
6684     for(i=1;i<=nCoeffs;i++) {
6685         int j= s->intra_scantable.permutated[i];
6686         level = block[j];
6687         if (level) {
6688             if (level < 0) {
6689                 level = -level;
6690                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6691                 level = -level;
6692             } else {
6693                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6694             }
6695             block[j] = level;
6696         }
6697     }
6698 }
6699
6700 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6701                                    DCTELEM *block, int n, int qscale)
6702 {
6703     int i, level, nCoeffs;
6704     const uint16_t *quant_matrix;
6705     int sum=-1;
6706
6707     if(s->alternate_scan) nCoeffs= 63;
6708     else nCoeffs= s->block_last_index[n];
6709
6710     if (n < 4)
6711         block[0] = block[0] * s->y_dc_scale;
6712     else
6713         block[0] = block[0] * s->c_dc_scale;
6714     quant_matrix = s->intra_matrix;
6715     for(i=1;i<=nCoeffs;i++) {
6716         int j= s->intra_scantable.permutated[i];
6717         level = block[j];
6718         if (level) {
6719             if (level < 0) {
6720                 level = -level;
6721                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6722                 level = -level;
6723             } else {
6724                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6725             }
6726             block[j] = level;
6727             sum+=level;
6728         }
6729     }
6730     block[63]^=sum&1;
6731 }
6732
6733 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6734                                    DCTELEM *block, int n, int qscale)
6735 {
6736     int i, level, nCoeffs;
6737     const uint16_t *quant_matrix;
6738     int sum=-1;
6739
6740     if(s->alternate_scan) nCoeffs= 63;
6741     else nCoeffs= s->block_last_index[n];
6742
6743     quant_matrix = s->inter_matrix;
6744     for(i=0; i<=nCoeffs; i++) {
6745         int j= s->intra_scantable.permutated[i];
6746         level = block[j];
6747         if (level) {
6748             if (level < 0) {
6749                 level = -level;
6750                 level = (((level << 1) + 1) * qscale *
6751                          ((int) (quant_matrix[j]))) >> 4;
6752                 level = -level;
6753             } else {
6754                 level = (((level << 1) + 1) * qscale *
6755                          ((int) (quant_matrix[j]))) >> 4;
6756             }
6757             block[j] = level;
6758             sum+=level;
6759         }
6760     }
6761     block[63]^=sum&1;
6762 }
6763
6764 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6765                                   DCTELEM *block, int n, int qscale)
6766 {
6767     int i, level, qmul, qadd;
6768     int nCoeffs;
6769
6770     assert(s->block_last_index[n]>=0);
6771
6772     qmul = qscale << 1;
6773
6774     if (!s->h263_aic) {
6775         if (n < 4)
6776             block[0] = block[0] * s->y_dc_scale;
6777         else
6778             block[0] = block[0] * s->c_dc_scale;
6779         qadd = (qscale - 1) | 1;
6780     }else{
6781         qadd = 0;
6782     }
6783     if(s->ac_pred)
6784         nCoeffs=63;
6785     else
6786         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6787
6788     for(i=1; i<=nCoeffs; i++) {
6789         level = block[i];
6790         if (level) {
6791             if (level < 0) {
6792                 level = level * qmul - qadd;
6793             } else {
6794                 level = level * qmul + qadd;
6795             }
6796             block[i] = level;
6797         }
6798     }
6799 }
6800
6801 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6802                                   DCTELEM *block, int n, int qscale)
6803 {
6804     int i, level, qmul, qadd;
6805     int nCoeffs;
6806
6807     assert(s->block_last_index[n]>=0);
6808
6809     qadd = (qscale - 1) | 1;
6810     qmul = qscale << 1;
6811
6812     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6813
6814     for(i=0; i<=nCoeffs; i++) {
6815         level = block[i];
6816         if (level) {
6817             if (level < 0) {
6818                 level = level * qmul - qadd;
6819             } else {
6820                 level = level * qmul + qadd;
6821             }
6822             block[i] = level;
6823         }
6824     }
6825 }
6826
6827 #ifdef CONFIG_ENCODERS
6828 AVCodec h263_encoder = {
6829     "h263",
6830     CODEC_TYPE_VIDEO,
6831     CODEC_ID_H263,
6832     sizeof(MpegEncContext),
6833     MPV_encode_init,
6834     MPV_encode_picture,
6835     MPV_encode_end,
6836     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6837 };
6838
6839 AVCodec h263p_encoder = {
6840     "h263p",
6841     CODEC_TYPE_VIDEO,
6842     CODEC_ID_H263P,
6843     sizeof(MpegEncContext),
6844     MPV_encode_init,
6845     MPV_encode_picture,
6846     MPV_encode_end,
6847     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6848 };
6849
6850 AVCodec flv_encoder = {
6851     "flv",
6852     CODEC_TYPE_VIDEO,
6853     CODEC_ID_FLV1,
6854     sizeof(MpegEncContext),
6855     MPV_encode_init,
6856     MPV_encode_picture,
6857     MPV_encode_end,
6858     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6859 };
6860
6861 AVCodec rv10_encoder = {
6862     "rv10",
6863     CODEC_TYPE_VIDEO,
6864     CODEC_ID_RV10,
6865     sizeof(MpegEncContext),
6866     MPV_encode_init,
6867     MPV_encode_picture,
6868     MPV_encode_end,
6869     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6870 };
6871
6872 AVCodec rv20_encoder = {
6873     "rv20",
6874     CODEC_TYPE_VIDEO,
6875     CODEC_ID_RV20,
6876     sizeof(MpegEncContext),
6877     MPV_encode_init,
6878     MPV_encode_picture,
6879     MPV_encode_end,
6880     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6881 };
6882
6883 AVCodec mpeg4_encoder = {
6884     "mpeg4",
6885     CODEC_TYPE_VIDEO,
6886     CODEC_ID_MPEG4,
6887     sizeof(MpegEncContext),
6888     MPV_encode_init,
6889     MPV_encode_picture,
6890     MPV_encode_end,
6891     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6892     .capabilities= CODEC_CAP_DELAY,
6893 };
6894
6895 AVCodec msmpeg4v1_encoder = {
6896     "msmpeg4v1",
6897     CODEC_TYPE_VIDEO,
6898     CODEC_ID_MSMPEG4V1,
6899     sizeof(MpegEncContext),
6900     MPV_encode_init,
6901     MPV_encode_picture,
6902     MPV_encode_end,
6903     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6904 };
6905
6906 AVCodec msmpeg4v2_encoder = {
6907     "msmpeg4v2",
6908     CODEC_TYPE_VIDEO,
6909     CODEC_ID_MSMPEG4V2,
6910     sizeof(MpegEncContext),
6911     MPV_encode_init,
6912     MPV_encode_picture,
6913     MPV_encode_end,
6914     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6915 };
6916
6917 AVCodec msmpeg4v3_encoder = {
6918     "msmpeg4",
6919     CODEC_TYPE_VIDEO,
6920     CODEC_ID_MSMPEG4V3,
6921     sizeof(MpegEncContext),
6922     MPV_encode_init,
6923     MPV_encode_picture,
6924     MPV_encode_end,
6925     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6926 };
6927
6928 AVCodec wmv1_encoder = {
6929     "wmv1",
6930     CODEC_TYPE_VIDEO,
6931     CODEC_ID_WMV1,
6932     sizeof(MpegEncContext),
6933     MPV_encode_init,
6934     MPV_encode_picture,
6935     MPV_encode_end,
6936     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6937 };
6938
6939 #endif //CONFIG_ENCODERS