git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 546     COPY(progressive_frame); //FIXME don't set in encode_header
 547     COPY(partitioned_frame); //FIXME don't set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_MJPEG:
1119         s->out_format = FMT_MJPEG;
1120         s->intra_only = 1; /* force intra only for jpeg */
1121         s->mjpeg_write_tables = 1; /* write all tables */
1122         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1123         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1124         s->mjpeg_vsample[1] = 1;
1125         s->mjpeg_vsample[2] = 1;
1126         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1127         s->mjpeg_hsample[1] = 1;
1128         s->mjpeg_hsample[2] = 1;
1129         if (mjpeg_init(s) < 0)
1130             return -1;
1131         avctx->delay=0;
1132         s->low_delay=1;
1133         break;
1134     case CODEC_ID_H261:
1135         s->out_format = FMT_H261;
1136         avctx->delay=0;
1137         s->low_delay=1;
1138         break;
1139     case CODEC_ID_H263:
1140         if (h263_get_picture_format(s->width, s->height) == 7) {
1141             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1142             return -1;
1143         }
1144         s->out_format = FMT_H263;
1145         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1146         avctx->delay=0;
1147         s->low_delay=1;
1148         break;
1149     case CODEC_ID_H263P:
1150         s->out_format = FMT_H263;
1151         s->h263_plus = 1;
1152         /* Fx */
1153         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1154         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1155         s->modified_quant= s->h263_aic;
1156         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1157         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1158         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1159         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1160         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1161
1162         /* /Fx */
1163         /* These are just to be sure */
1164         avctx->delay=0;
1165         s->low_delay=1;
1166         break;
1167     case CODEC_ID_FLV1:
1168         s->out_format = FMT_H263;
1169         s->h263_flv = 2; /* format = 1; 11-bit codes */
1170         s->unrestricted_mv = 1;
1171         s->rtp_mode=0; /* don't allow GOB */
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175     case CODEC_ID_RV10:
1176         s->out_format = FMT_H263;
1177         avctx->delay=0;
1178         s->low_delay=1;
1179         break;
1180     case CODEC_ID_RV20:
1181         s->out_format = FMT_H263;
1182         avctx->delay=0;
1183         s->low_delay=1;
1184         s->modified_quant=1;
1185         s->h263_aic=1;
1186         s->h263_plus=1;
1187         s->loop_filter=1;
1188         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1189         break;
1190     case CODEC_ID_MPEG4:
1191         s->out_format = FMT_H263;
1192         s->h263_pred = 1;
1193         s->unrestricted_mv = 1;
1194         s->low_delay= s->max_b_frames ? 0 : 1;
1195         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1196         break;
1197     case CODEC_ID_MSMPEG4V1:
1198         s->out_format = FMT_H263;
1199         s->h263_msmpeg4 = 1;
1200         s->h263_pred = 1;
1201         s->unrestricted_mv = 1;
1202         s->msmpeg4_version= 1;
1203         avctx->delay=0;
1204         s->low_delay=1;
1205         break;
1206     case CODEC_ID_MSMPEG4V2:
1207         s->out_format = FMT_H263;
1208         s->h263_msmpeg4 = 1;
1209         s->h263_pred = 1;
1210         s->unrestricted_mv = 1;
1211         s->msmpeg4_version= 2;
1212         avctx->delay=0;
1213         s->low_delay=1;
1214         break;
1215     case CODEC_ID_MSMPEG4V3:
1216         s->out_format = FMT_H263;
1217         s->h263_msmpeg4 = 1;
1218         s->h263_pred = 1;
1219         s->unrestricted_mv = 1;
1220         s->msmpeg4_version= 3;
1221         s->flipflop_rounding=1;
1222         avctx->delay=0;
1223         s->low_delay=1;
1224         break;
1225     case CODEC_ID_WMV1:
1226         s->out_format = FMT_H263;
1227         s->h263_msmpeg4 = 1;
1228         s->h263_pred = 1;
1229         s->unrestricted_mv = 1;
1230         s->msmpeg4_version= 4;
1231         s->flipflop_rounding=1;
1232         avctx->delay=0;
1233         s->low_delay=1;
1234         break;
1235     case CODEC_ID_WMV2:
1236         s->out_format = FMT_H263;
1237         s->h263_msmpeg4 = 1;
1238         s->h263_pred = 1;
1239         s->unrestricted_mv = 1;
1240         s->msmpeg4_version= 5;
1241         s->flipflop_rounding=1;
1242         avctx->delay=0;
1243         s->low_delay=1;
1244         break;
1245     default:
1246         return -1;
1247     }
1248
1249     avctx->has_b_frames= !s->low_delay;
1250
1251     s->encoding = 1;
1252
1253     /* init */
1254     if (MPV_common_init(s) < 0)
1255         return -1;
1256
1257     if(s->modified_quant)
1258         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1259     s->progressive_frame=
1260     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1261     s->quant_precision=5;
1262
1263     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1264     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1265
1266 #ifdef CONFIG_H261_ENCODER
1267     if (s->out_format == FMT_H261)
1268         ff_h261_encode_init(s);
1269 #endif
1270     if (s->out_format == FMT_H263)
1271         h263_encode_init(s);
1272     if(s->msmpeg4_version)
1273         ff_msmpeg4_encode_init(s);
1274     if (s->out_format == FMT_MPEG1)
1275         ff_mpeg1_encode_init(s);
1276
1277     /* init q matrix */
1278     for(i=0;i<64;i++) {
1279         int j= s->dsp.idct_permutation[i];
1280         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1281             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1282             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1283         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1284             s->intra_matrix[j] =
1285             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1286         }else
1287         { /* mpeg1/2 */
1288             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1289             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1290         }
1291         if(s->avctx->intra_matrix)
1292             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1293         if(s->avctx->inter_matrix)
1294             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1295     }
1296
1297     /* precompute matrix */
1298     /* for mjpeg, we do include qscale in the matrix */
1299     if (s->out_format != FMT_MJPEG) {
1300         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1301                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1302         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1303                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1304     }
1305
1306     if(ff_rate_control_init(s) < 0)
1307         return -1;
1308
1309     return 0;
1310 }
1311
1312 int MPV_encode_end(AVCodecContext *avctx)
1313 {
1314     MpegEncContext *s = avctx->priv_data;
1315
1316 #ifdef STATS
1317     print_stats();
1318 #endif
1319
1320     ff_rate_control_uninit(s);
1321
1322     MPV_common_end(s);
1323     if (s->out_format == FMT_MJPEG)
1324         mjpeg_close(s);
1325
1326     av_freep(&avctx->extradata);
1327
1328     return 0;
1329 }
1330
1331 #endif //CONFIG_ENCODERS
1332
1333 void init_rl(RLTable *rl, int use_static)
1334 {
1335     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1336     uint8_t index_run[MAX_RUN+1];
1337     int last, run, level, start, end, i;
1338
1339     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1340     if(use_static && rl->max_level[0])
1341         return;
1342
1343     /* compute max_level[], max_run[] and index_run[] */
1344     for(last=0;last<2;last++) {
1345         if (last == 0) {
1346             start = 0;
1347             end = rl->last;
1348         } else {
1349             start = rl->last;
1350             end = rl->n;
1351         }
1352
1353         memset(max_level, 0, MAX_RUN + 1);
1354         memset(max_run, 0, MAX_LEVEL + 1);
1355         memset(index_run, rl->n, MAX_RUN + 1);
1356         for(i=start;i<end;i++) {
1357             run = rl->table_run[i];
1358             level = rl->table_level[i];
1359             if (index_run[run] == rl->n)
1360                 index_run[run] = i;
1361             if (level > max_level[run])
1362                 max_level[run] = level;
1363             if (run > max_run[level])
1364                 max_run[level] = run;
1365         }
1366         if(use_static)
1367             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1368         else
1369             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1370         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1371         if(use_static)
1372             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1373         else
1374             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1375         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1376         if(use_static)
1377             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1378         else
1379             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1380         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1381     }
1382 }
1383
1384 /* draw the edges of width 'w' of an image of size width, height */
1385 //FIXME check that this is ok for mpeg4 interlaced
1386 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1387 {
1388     uint8_t *ptr, *last_line;
1389     int i;
1390
1391     last_line = buf + (height - 1) * wrap;
1392     for(i=0;i<w;i++) {
1393         /* top and bottom */
1394         memcpy(buf - (i + 1) * wrap, buf, width);
1395         memcpy(last_line + (i + 1) * wrap, last_line, width);
1396     }
1397     /* left and right */
1398     ptr = buf;
1399     for(i=0;i<height;i++) {
1400         memset(ptr - w, ptr[0], w);
1401         memset(ptr + width, ptr[width-1], w);
1402         ptr += wrap;
1403     }
1404     /* corners */
1405     for(i=0;i<w;i++) {
1406         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1407         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1408         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1409         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1410     }
1411 }
1412
1413 int ff_find_unused_picture(MpegEncContext *s, int shared){
1414     int i;
1415
1416     if(shared){
1417         for(i=0; i<MAX_PICTURE_COUNT; i++){
1418             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1419         }
1420     }else{
1421         for(i=0; i<MAX_PICTURE_COUNT; i++){
1422             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1423         }
1424         for(i=0; i<MAX_PICTURE_COUNT; i++){
1425             if(s->picture[i].data[0]==NULL) return i;
1426         }
1427     }
1428
1429     assert(0);
1430     return -1;
1431 }
1432
1433 static void update_noise_reduction(MpegEncContext *s){
1434     int intra, i;
1435
1436     for(intra=0; intra<2; intra++){
1437         if(s->dct_count[intra] > (1<<16)){
1438             for(i=0; i<64; i++){
1439                 s->dct_error_sum[intra][i] >>=1;
1440             }
1441             s->dct_count[intra] >>= 1;
1442         }
1443
1444         for(i=0; i<64; i++){
1445             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1446         }
1447     }
1448 }
1449
1450 /**
1451  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1452  */
1453 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1454 {
1455     int i;
1456     AVFrame *pic;
1457     s->mb_skipped = 0;
1458
1459     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1460
1461     /* mark&release old frames */
1462     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1463         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1464
1465         /* release forgotten pictures */
1466         /* if(mpeg124/h263) */
1467         if(!s->encoding){
1468             for(i=0; i<MAX_PICTURE_COUNT; i++){
1469                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1470                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1471                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1472                 }
1473             }
1474         }
1475     }
1476 alloc:
1477     if(!s->encoding){
1478         /* release non reference frames */
1479         for(i=0; i<MAX_PICTURE_COUNT; i++){
1480             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1481                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1482             }
1483         }
1484
1485         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1486             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1487         else{
1488             i= ff_find_unused_picture(s, 0);
1489             pic= (AVFrame*)&s->picture[i];
1490         }
1491
1492         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1493                         && !s->dropable ? 3 : 0;
1494
1495         pic->coded_picture_number= s->coded_picture_number++;
1496
1497         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1498             return -1;
1499
1500         s->current_picture_ptr= (Picture*)pic;
1501         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1502         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1503     }
1504
1505     s->current_picture_ptr->pict_type= s->pict_type;
1506 //    if(s->flags && CODEC_FLAG_QSCALE)
1507   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1508     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1509
1510     copy_picture(&s->current_picture, s->current_picture_ptr);
1511
1512   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1513     if (s->pict_type != B_TYPE) {
1514         s->last_picture_ptr= s->next_picture_ptr;
1515         if(!s->dropable)
1516             s->next_picture_ptr= s->current_picture_ptr;
1517     }
1518 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1519         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1520         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1521         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1522         s->pict_type, s->dropable);*/
1523
1524     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1525     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1526
1527     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1528         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1529         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1530         goto alloc;
1531     }
1532
1533     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1534
1535     if(s->picture_structure!=PICT_FRAME){
1536         int i;
1537         for(i=0; i<4; i++){
1538             if(s->picture_structure == PICT_BOTTOM_FIELD){
1539                  s->current_picture.data[i] += s->current_picture.linesize[i];
1540             }
1541             s->current_picture.linesize[i] *= 2;
1542             s->last_picture.linesize[i] *=2;
1543             s->next_picture.linesize[i] *=2;
1544         }
1545     }
1546   }
1547
1548     s->hurry_up= s->avctx->hurry_up;
1549     s->error_resilience= avctx->error_resilience;
1550
1551     /* set dequantizer, we can't do it during init as it might change for mpeg4
1552        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1553     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1554         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1555         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1556     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1557         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1558         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1559     }else{
1560         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1561         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1562     }
1563
1564     if(s->dct_error_sum){
1565         assert(s->avctx->noise_reduction && s->encoding);
1566
1567         update_noise_reduction(s);
1568     }
1569
1570 #ifdef HAVE_XVMC
1571     if(s->avctx->xvmc_acceleration)
1572         return XVMC_field_start(s, avctx);
1573 #endif
1574     return 0;
1575 }
1576
1577 /* generic function for encode/decode called after a frame has been coded/decoded */
1578 void MPV_frame_end(MpegEncContext *s)
1579 {
1580     int i;
1581     /* draw edge for correct motion prediction if outside */
1582 #ifdef HAVE_XVMC
1583 //just to make sure that all data is rendered.
1584     if(s->avctx->xvmc_acceleration){
1585         XVMC_field_end(s);
1586     }else
1587 #endif
1588     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1589             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1590             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1591             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592     }
1593     emms_c();
1594
1595     s->last_pict_type    = s->pict_type;
1596     if(s->pict_type!=B_TYPE){
1597         s->last_non_b_pict_type= s->pict_type;
1598     }
1599 #if 0
1600         /* copy back current_picture variables */
1601     for(i=0; i<MAX_PICTURE_COUNT; i++){
1602         if(s->picture[i].data[0] == s->current_picture.data[0]){
1603             s->picture[i]= s->current_picture;
1604             break;
1605         }
1606     }
1607     assert(i<MAX_PICTURE_COUNT);
1608 #endif
1609
1610     if(s->encoding){
1611         /* release non-reference frames */
1612         for(i=0; i<MAX_PICTURE_COUNT; i++){
1613             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1614                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1615             }
1616         }
1617     }
1618     // clear copies, to avoid confusion
1619 #if 0
1620     memset(&s->last_picture, 0, sizeof(Picture));
1621     memset(&s->next_picture, 0, sizeof(Picture));
1622     memset(&s->current_picture, 0, sizeof(Picture));
1623 #endif
1624     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1625 }
1626
1627 /**
1628  * draws an line from (ex, ey) -> (sx, sy).
1629  * @param w width of the image
1630  * @param h height of the image
1631  * @param stride stride/linesize of the image
1632  * @param color color of the arrow
1633  */
1634 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1635     int t, x, y, fr, f;
1636
1637     sx= clip(sx, 0, w-1);
1638     sy= clip(sy, 0, h-1);
1639     ex= clip(ex, 0, w-1);
1640     ey= clip(ey, 0, h-1);
1641
1642     buf[sy*stride + sx]+= color;
1643
1644     if(ABS(ex - sx) > ABS(ey - sy)){
1645         if(sx > ex){
1646             t=sx; sx=ex; ex=t;
1647             t=sy; sy=ey; ey=t;
1648         }
1649         buf+= sx + sy*stride;
1650         ex-= sx;
1651         f= ((ey-sy)<<16)/ex;
1652         for(x= 0; x <= ex; x++){
1653             y = (x*f)>>16;
1654             fr= (x*f)&0xFFFF;
1655             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1656             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1657         }
1658     }else{
1659         if(sy > ey){
1660             t=sx; sx=ex; ex=t;
1661             t=sy; sy=ey; ey=t;
1662         }
1663         buf+= sx + sy*stride;
1664         ey-= sy;
1665         if(ey) f= ((ex-sx)<<16)/ey;
1666         else   f= 0;
1667         for(y= 0; y <= ey; y++){
1668             x = (y*f)>>16;
1669             fr= (y*f)&0xFFFF;
1670             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1671             buf[y*stride + x+1]+= (color*         fr )>>16;;
1672         }
1673     }
1674 }
1675
1676 /**
1677  * draws an arrow from (ex, ey) -> (sx, sy).
1678  * @param w width of the image
1679  * @param h height of the image
1680  * @param stride stride/linesize of the image
1681  * @param color color of the arrow
1682  */
1683 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1684     int dx,dy;
1685
1686     sx= clip(sx, -100, w+100);
1687     sy= clip(sy, -100, h+100);
1688     ex= clip(ex, -100, w+100);
1689     ey= clip(ey, -100, h+100);
1690
1691     dx= ex - sx;
1692     dy= ey - sy;
1693
1694     if(dx*dx + dy*dy > 3*3){
1695         int rx=  dx + dy;
1696         int ry= -dx + dy;
1697         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1698
1699         //FIXME subpixel accuracy
1700         rx= ROUNDED_DIV(rx*3<<4, length);
1701         ry= ROUNDED_DIV(ry*3<<4, length);
1702
1703         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1704         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1705     }
1706     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1707 }
1708
1709 /**
1710  * prints debuging info for the given picture.
1711  */
1712 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1713
1714     if(!pict || !pict->mb_type) return;
1715
1716     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1717         int x,y;
1718
1719         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1720         switch (pict->pict_type) {
1721             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1722             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1723             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1724             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1725             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1726             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1727         }
1728         for(y=0; y<s->mb_height; y++){
1729             for(x=0; x<s->mb_width; x++){
1730                 if(s->avctx->debug&FF_DEBUG_SKIP){
1731                     int count= s->mbskip_table[x + y*s->mb_stride];
1732                     if(count>9) count=9;
1733                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1734                 }
1735                 if(s->avctx->debug&FF_DEBUG_QP){
1736                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1737                 }
1738                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1739                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1740                     //Type & MV direction
1741                     if(IS_PCM(mb_type))
1742                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1743                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1744                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1745                     else if(IS_INTRA4x4(mb_type))
1746                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1747                     else if(IS_INTRA16x16(mb_type))
1748                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1749                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1750                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1751                     else if(IS_DIRECT(mb_type))
1752                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1753                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1754                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1755                     else if(IS_GMC(mb_type))
1756                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1757                     else if(IS_SKIP(mb_type))
1758                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1759                     else if(!USES_LIST(mb_type, 1))
1760                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1761                     else if(!USES_LIST(mb_type, 0))
1762                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1763                     else{
1764                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1765                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1766                     }
1767
1768                     //segmentation
1769                     if(IS_8X8(mb_type))
1770                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1771                     else if(IS_16X8(mb_type))
1772                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1773                     else if(IS_8X16(mb_type))
1774                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1775                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1776                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1777                     else
1778                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1779
1780
1781                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1782                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1783                     else
1784                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1785                 }
1786 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1787             }
1788             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1789         }
1790     }
1791
1792     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1793         const int shift= 1 + s->quarter_sample;
1794         int mb_y;
1795         uint8_t *ptr;
1796         int i;
1797         int h_chroma_shift, v_chroma_shift;
1798         const int width = s->avctx->width;
1799         const int height= s->avctx->height;
1800         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1801         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1802         s->low_delay=0; //needed to see the vectors without trashing the buffers
1803
1804         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1805         for(i=0; i<3; i++){
1806             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1807             pict->data[i]= s->visualization_buffer[i];
1808         }
1809         pict->type= FF_BUFFER_TYPE_COPY;
1810         ptr= pict->data[0];
1811
1812         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1813             int mb_x;
1814             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1815                 const int mb_index= mb_x + mb_y*s->mb_stride;
1816                 if((s->avctx->debug_mv) && pict->motion_val){
1817                   int type;
1818                   for(type=0; type<3; type++){
1819                     int direction = 0;
1820                     switch (type) {
1821                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1822                                 continue;
1823                               direction = 0;
1824                               break;
1825                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1826                                 continue;
1827                               direction = 0;
1828                               break;
1829                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1830                                 continue;
1831                               direction = 1;
1832                               break;
1833                     }
1834                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1835                         continue;
1836
1837                     if(IS_8X8(pict->mb_type[mb_index])){
1838                       int i;
1839                       for(i=0; i<4; i++){
1840                         int sx= mb_x*16 + 4 + 8*(i&1);
1841                         int sy= mb_y*16 + 4 + 8*(i>>1);
1842                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1843                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1844                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1845                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1846                       }
1847                     }else if(IS_16X8(pict->mb_type[mb_index])){
1848                       int i;
1849                       for(i=0; i<2; i++){
1850                         int sx=mb_x*16 + 8;
1851                         int sy=mb_y*16 + 4 + 8*i;
1852                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1853                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1854                         int my=(pict->motion_val[direction][xy][1]>>shift);
1855
1856                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1857                             my*=2;
1858
1859                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1860                       }
1861                     }else if(IS_8X16(pict->mb_type[mb_index])){
1862                       int i;
1863                       for(i=0; i<2; i++){
1864                         int sx=mb_x*16 + 4 + 8*i;
1865                         int sy=mb_y*16 + 8;
1866                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1867                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1868                         int my=(pict->motion_val[direction][xy][1]>>shift);
1869
1870                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1871                             my*=2;
1872
1873                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1874                       }
1875                     }else{
1876                       int sx= mb_x*16 + 8;
1877                       int sy= mb_y*16 + 8;
1878                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1879                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1880                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1881                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1882                     }
1883                   }
1884                 }
1885                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1886                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1887                     int y;
1888                     for(y=0; y<8; y++){
1889                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1890                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1891                     }
1892                 }
1893                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1894                     int mb_type= pict->mb_type[mb_index];
1895                     uint64_t u,v;
1896                     int y;
1897 #define COLOR(theta, r)\
1898 u= (int)(128 + r*cos(theta*3.141592/180));\
1899 v= (int)(128 + r*sin(theta*3.141592/180));
1900
1901
1902                     u=v=128;
1903                     if(IS_PCM(mb_type)){
1904                         COLOR(120,48)
1905                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1906                         COLOR(30,48)
1907                     }else if(IS_INTRA4x4(mb_type)){
1908                         COLOR(90,48)
1909                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1910 //                        COLOR(120,48)
1911                     }else if(IS_DIRECT(mb_type)){
1912                         COLOR(150,48)
1913                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1914                         COLOR(170,48)
1915                     }else if(IS_GMC(mb_type)){
1916                         COLOR(190,48)
1917                     }else if(IS_SKIP(mb_type)){
1918 //                        COLOR(180,48)
1919                     }else if(!USES_LIST(mb_type, 1)){
1920                         COLOR(240,48)
1921                     }else if(!USES_LIST(mb_type, 0)){
1922                         COLOR(0,48)
1923                     }else{
1924                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1925                         COLOR(300,48)
1926                     }
1927
1928                     u*= 0x0101010101010101ULL;
1929                     v*= 0x0101010101010101ULL;
1930                     for(y=0; y<8; y++){
1931                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1932                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1933                     }
1934
1935                     //segmentation
1936                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1937                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                     }
1940                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1941                         for(y=0; y<16; y++)
1942                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1943                     }
1944                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1945                         int dm= 1 << (mv_sample_log2-2);
1946                         for(i=0; i<4; i++){
1947                             int sx= mb_x*16 + 8*(i&1);
1948                             int sy= mb_y*16 + 8*(i>>1);
1949                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1950                             //FIXME bidir
1951                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1952                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1953                                 for(y=0; y<8; y++)
1954                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1955                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1956                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1957                         }
1958                     }
1959
1960                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1961                         // hmm
1962                     }
1963                 }
1964                 s->mbskip_table[mb_index]=0;
1965             }
1966         }
1967     }
1968 }
1969
1970 #ifdef CONFIG_ENCODERS
1971
1972 static int get_sae(uint8_t *src, int ref, int stride){
1973     int x,y;
1974     int acc=0;
1975
1976     for(y=0; y<16; y++){
1977         for(x=0; x<16; x++){
1978             acc+= ABS(src[x+y*stride] - ref);
1979         }
1980     }
1981
1982     return acc;
1983 }
1984
1985 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1986     int x, y, w, h;
1987     int acc=0;
1988
1989     w= s->width &~15;
1990     h= s->height&~15;
1991
1992     for(y=0; y<h; y+=16){
1993         for(x=0; x<w; x+=16){
1994             int offset= x + y*stride;
1995             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1996             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1997             int sae = get_sae(src + offset, mean, stride);
1998
1999             acc+= sae + 500 < sad;
2000         }
2001     }
2002     return acc;
2003 }
2004
2005
2006 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2007     AVFrame *pic=NULL;
2008     int64_t pts;
2009     int i;
2010     const int encoding_delay= s->max_b_frames;
2011     int direct=1;
2012
2013     if(pic_arg){
2014         pts= pic_arg->pts;
2015         pic_arg->display_picture_number= s->input_picture_number++;
2016
2017         if(pts != AV_NOPTS_VALUE){
2018             if(s->user_specified_pts != AV_NOPTS_VALUE){
2019                 int64_t time= pts;
2020                 int64_t last= s->user_specified_pts;
2021
2022                 if(time <= last){
2023                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
2024                     return -1;
2025                 }
2026             }
2027             s->user_specified_pts= pts;
2028         }else{
2029             if(s->user_specified_pts != AV_NOPTS_VALUE){
2030                 s->user_specified_pts=
2031                 pts= s->user_specified_pts + 1;
2032                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
2033             }else{
2034                 pts= pic_arg->display_picture_number;
2035             }
2036         }
2037     }
2038
2039   if(pic_arg){
2040     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2041     if(pic_arg->linesize[0] != s->linesize) direct=0;
2042     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2043     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2044
2045 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2046
2047     if(direct){
2048         i= ff_find_unused_picture(s, 1);
2049
2050         pic= (AVFrame*)&s->picture[i];
2051         pic->reference= 3;
2052
2053         for(i=0; i<4; i++){
2054             pic->data[i]= pic_arg->data[i];
2055             pic->linesize[i]= pic_arg->linesize[i];
2056         }
2057         alloc_picture(s, (Picture*)pic, 1);
2058     }else{
2059         int offset= 16;
2060         i= ff_find_unused_picture(s, 0);
2061
2062         pic= (AVFrame*)&s->picture[i];
2063         pic->reference= 3;
2064
2065         alloc_picture(s, (Picture*)pic, 0);
2066
2067         if(   pic->data[0] + offset == pic_arg->data[0]
2068            && pic->data[1] + offset == pic_arg->data[1]
2069            && pic->data[2] + offset == pic_arg->data[2]){
2070        // empty
2071         }else{
2072             int h_chroma_shift, v_chroma_shift;
2073             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2074
2075             for(i=0; i<3; i++){
2076                 int src_stride= pic_arg->linesize[i];
2077                 int dst_stride= i ? s->uvlinesize : s->linesize;
2078                 int h_shift= i ? h_chroma_shift : 0;
2079                 int v_shift= i ? v_chroma_shift : 0;
2080                 int w= s->width >>h_shift;
2081                 int h= s->height>>v_shift;
2082                 uint8_t *src= pic_arg->data[i];
2083                 uint8_t *dst= pic->data[i] + offset;
2084
2085                 if(src_stride==dst_stride)
2086                     memcpy(dst, src, src_stride*h);
2087                 else{
2088                     while(h--){
2089                         memcpy(dst, src, w);
2090                         dst += dst_stride;
2091                         src += src_stride;
2092                     }
2093                 }
2094             }
2095         }
2096     }
2097     copy_picture_attributes(s, pic, pic_arg);
2098     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2099   }
2100
2101     /* shift buffer entries */
2102     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2103         s->input_picture[i-1]= s->input_picture[i];
2104
2105     s->input_picture[encoding_delay]= (Picture*)pic;
2106
2107     return 0;
2108 }
2109
2110 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2111     int x, y, plane;
2112     int score=0;
2113     int64_t score64=0;
2114
2115     for(plane=0; plane<3; plane++){
2116         const int stride= p->linesize[plane];
2117         const int bw= plane ? 1 : 2;
2118         for(y=0; y<s->mb_height*bw; y++){
2119             for(x=0; x<s->mb_width*bw; x++){
2120                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2121                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2122
2123                 switch(s->avctx->frame_skip_exp){
2124                     case 0: score= FFMAX(score, v); break;
2125                     case 1: score+= ABS(v);break;
2126                     case 2: score+= v*v;break;
2127                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2128                     case 4: score64+= v*v*(int64_t)(v*v);break;
2129                 }
2130             }
2131         }
2132     }
2133
2134     if(score) score64= score;
2135
2136     if(score64 < s->avctx->frame_skip_threshold)
2137         return 1;
2138     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2139         return 1;
2140     return 0;
2141 }
2142
2143 static void select_input_picture(MpegEncContext *s){
2144     int i;
2145
2146     for(i=1; i<MAX_PICTURE_COUNT; i++)
2147         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2148     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2149
2150     /* set next picture type & ordering */
2151     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2152         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2153             s->reordered_input_picture[0]= s->input_picture[0];
2154             s->reordered_input_picture[0]->pict_type= I_TYPE;
2155             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2156         }else{
2157             int b_frames;
2158
2159             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2160                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2161                 //FIXME check that te gop check above is +-1 correct
2162 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2163
2164                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2165                         for(i=0; i<4; i++)
2166                             s->input_picture[0]->data[i]= NULL;
2167                         s->input_picture[0]->type= 0;
2168                     }else{
2169                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2170                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2171
2172                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2173                     }
2174
2175                     emms_c();
2176                     ff_vbv_update(s, 0);
2177
2178                     goto no_output_pic;
2179                 }
2180             }
2181
2182             if(s->flags&CODEC_FLAG_PASS2){
2183                 for(i=0; i<s->max_b_frames+1; i++){
2184                     int pict_num= s->input_picture[0]->display_picture_number + i;
2185
2186                     if(pict_num >= s->rc_context.num_entries)
2187                         break;
2188                     if(!s->input_picture[i]){
2189                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2190                         break;
2191                     }
2192
2193                     s->input_picture[i]->pict_type=
2194                         s->rc_context.entry[pict_num].new_pict_type;
2195                 }
2196             }
2197
2198             if(s->avctx->b_frame_strategy==0){
2199                 b_frames= s->max_b_frames;
2200                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2201             }else if(s->avctx->b_frame_strategy==1){
2202                 for(i=1; i<s->max_b_frames+1; i++){
2203                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2204                         s->input_picture[i]->b_frame_score=
2205                             get_intra_count(s, s->input_picture[i  ]->data[0],
2206                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2207                     }
2208                 }
2209                 for(i=0; i<s->max_b_frames+1; i++){
2210                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2211                 }
2212
2213                 b_frames= FFMAX(0, i-1);
2214
2215                 /* reset scores */
2216                 for(i=0; i<b_frames+1; i++){
2217                     s->input_picture[i]->b_frame_score=0;
2218                 }
2219             }else{
2220                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2221                 b_frames=0;
2222             }
2223
2224             emms_c();
2225 //static int b_count=0;
2226 //b_count+= b_frames;
2227 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2228
2229             for(i= b_frames - 1; i>=0; i--){
2230                 int type= s->input_picture[i]->pict_type;
2231                 if(type && type != B_TYPE)
2232                     b_frames= i;
2233             }
2234             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2235                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2236             }
2237
2238             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2239               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2240                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2241               }else{
2242                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2243                     b_frames=0;
2244                 s->input_picture[b_frames]->pict_type= I_TYPE;
2245               }
2246             }
2247
2248             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2249                && b_frames
2250                && s->input_picture[b_frames]->pict_type== I_TYPE)
2251                 b_frames--;
2252
2253             s->reordered_input_picture[0]= s->input_picture[b_frames];
2254             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2255                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2256             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2257             for(i=0; i<b_frames; i++){
2258                 s->reordered_input_picture[i+1]= s->input_picture[i];
2259                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2260                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2261             }
2262         }
2263     }
2264 no_output_pic:
2265     if(s->reordered_input_picture[0]){
2266         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2267
2268         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2269
2270         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2271             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2272
2273             int i= ff_find_unused_picture(s, 0);
2274             Picture *pic= &s->picture[i];
2275
2276             /* mark us unused / free shared pic */
2277             for(i=0; i<4; i++)
2278                 s->reordered_input_picture[0]->data[i]= NULL;
2279             s->reordered_input_picture[0]->type= 0;
2280
2281             pic->reference              = s->reordered_input_picture[0]->reference;
2282
2283             alloc_picture(s, pic, 0);
2284
2285             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2286
2287             s->current_picture_ptr= pic;
2288         }else{
2289             // input is not a shared pix -> reuse buffer for current_pix
2290
2291             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2292                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2293
2294             s->current_picture_ptr= s->reordered_input_picture[0];
2295             for(i=0; i<4; i++){
2296                 s->new_picture.data[i]+=16;
2297             }
2298         }
2299         copy_picture(&s->current_picture, s->current_picture_ptr);
2300
2301         s->picture_number= s->new_picture.display_picture_number;
2302 //printf("dpn:%d\n", s->picture_number);
2303     }else{
2304        memset(&s->new_picture, 0, sizeof(Picture));
2305     }
2306 }
2307
2308 int MPV_encode_picture(AVCodecContext *avctx,
2309                        unsigned char *buf, int buf_size, void *data)
2310 {
2311     MpegEncContext *s = avctx->priv_data;
2312     AVFrame *pic_arg = data;
2313     int i, stuffing_count;
2314
2315     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2316         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2317         return -1;
2318     }
2319
2320     for(i=0; i<avctx->thread_count; i++){
2321         int start_y= s->thread_context[i]->start_mb_y;
2322         int   end_y= s->thread_context[i]->  end_mb_y;
2323         int h= s->mb_height;
2324         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2325         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2326
2327         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2328     }
2329
2330     s->picture_in_gop_number++;
2331
2332     if(load_input_picture(s, pic_arg) < 0)
2333         return -1;
2334
2335     select_input_picture(s);
2336
2337     /* output? */
2338     if(s->new_picture.data[0]){
2339         s->pict_type= s->new_picture.pict_type;
2340 //emms_c();
2341 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2342         MPV_frame_start(s, avctx);
2343
2344         encode_picture(s, s->picture_number);
2345
2346         avctx->real_pict_num  = s->picture_number;
2347         avctx->header_bits = s->header_bits;
2348         avctx->mv_bits     = s->mv_bits;
2349         avctx->misc_bits   = s->misc_bits;
2350         avctx->i_tex_bits  = s->i_tex_bits;
2351         avctx->p_tex_bits  = s->p_tex_bits;
2352         avctx->i_count     = s->i_count;
2353         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2354         avctx->skip_count  = s->skip_count;
2355
2356         MPV_frame_end(s);
2357
2358         if (s->out_format == FMT_MJPEG)
2359             mjpeg_picture_trailer(s);
2360
2361         if(s->flags&CODEC_FLAG_PASS1)
2362             ff_write_pass1_stats(s);
2363
2364         for(i=0; i<4; i++){
2365             avctx->error[i] += s->current_picture_ptr->error[i];
2366         }
2367
2368         if(s->flags&CODEC_FLAG_PASS1)
2369             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2370         flush_put_bits(&s->pb);
2371         s->frame_bits  = put_bits_count(&s->pb);
2372
2373         stuffing_count= ff_vbv_update(s, s->frame_bits);
2374         if(stuffing_count){
2375             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2376                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2377                 return -1;
2378             }
2379
2380             switch(s->codec_id){
2381             case CODEC_ID_MPEG1VIDEO:
2382             case CODEC_ID_MPEG2VIDEO:
2383                 while(stuffing_count--){
2384                     put_bits(&s->pb, 8, 0);
2385                 }
2386             break;
2387             case CODEC_ID_MPEG4:
2388                 put_bits(&s->pb, 16, 0);
2389                 put_bits(&s->pb, 16, 0x1C3);
2390                 stuffing_count -= 4;
2391                 while(stuffing_count--){
2392                     put_bits(&s->pb, 8, 0xFF);
2393                 }
2394             break;
2395             default:
2396                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2397             }
2398             flush_put_bits(&s->pb);
2399             s->frame_bits  = put_bits_count(&s->pb);
2400         }
2401
2402         /* update mpeg1/2 vbv_delay for CBR */
2403         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2404            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2405             int vbv_delay;
2406
2407             assert(s->repeat_first_field==0);
2408
2409             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2410             assert(vbv_delay < 0xFFFF);
2411
2412             s->vbv_delay_ptr[0] &= 0xF8;
2413             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2414             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2415             s->vbv_delay_ptr[2] &= 0x07;
2416             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2417         }
2418         s->total_bits += s->frame_bits;
2419         avctx->frame_bits  = s->frame_bits;
2420     }else{
2421         assert((pbBufPtr(&s->pb) == s->pb.buf));
2422         s->frame_bits=0;
2423     }
2424     assert((s->frame_bits&7)==0);
2425
2426     return s->frame_bits/8;
2427 }
2428
2429 #endif //CONFIG_ENCODERS
2430
2431 static inline void gmc1_motion(MpegEncContext *s,
2432                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2433                                uint8_t **ref_picture)
2434 {
2435     uint8_t *ptr;
2436     int offset, src_x, src_y, linesize, uvlinesize;
2437     int motion_x, motion_y;
2438     int emu=0;
2439
2440     motion_x= s->sprite_offset[0][0];
2441     motion_y= s->sprite_offset[0][1];
2442     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2443     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2444     motion_x<<=(3-s->sprite_warping_accuracy);
2445     motion_y<<=(3-s->sprite_warping_accuracy);
2446     src_x = clip(src_x, -16, s->width);
2447     if (src_x == s->width)
2448         motion_x =0;
2449     src_y = clip(src_y, -16, s->height);
2450     if (src_y == s->height)
2451         motion_y =0;
2452
2453     linesize = s->linesize;
2454     uvlinesize = s->uvlinesize;
2455
2456     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2457
2458     if(s->flags&CODEC_FLAG_EMU_EDGE){
2459         if(   (unsigned)src_x >= s->h_edge_pos - 17
2460            || (unsigned)src_y >= s->v_edge_pos - 17){
2461             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2462             ptr= s->edge_emu_buffer;
2463         }
2464     }
2465
2466     if((motion_x|motion_y)&7){
2467         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2468         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2469     }else{
2470         int dxy;
2471
2472         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2473         if (s->no_rounding){
2474             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2475         }else{
2476             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2477         }
2478     }
2479
2480     if(s->flags&CODEC_FLAG_GRAY) return;
2481
2482     motion_x= s->sprite_offset[1][0];
2483     motion_y= s->sprite_offset[1][1];
2484     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2485     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2486     motion_x<<=(3-s->sprite_warping_accuracy);
2487     motion_y<<=(3-s->sprite_warping_accuracy);
2488     src_x = clip(src_x, -8, s->width>>1);
2489     if (src_x == s->width>>1)
2490         motion_x =0;
2491     src_y = clip(src_y, -8, s->height>>1);
2492     if (src_y == s->height>>1)
2493         motion_y =0;
2494
2495     offset = (src_y * uvlinesize) + src_x;
2496     ptr = ref_picture[1] + offset;
2497     if(s->flags&CODEC_FLAG_EMU_EDGE){
2498         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2499            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2500             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2501             ptr= s->edge_emu_buffer;
2502             emu=1;
2503         }
2504     }
2505     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2506
2507     ptr = ref_picture[2] + offset;
2508     if(emu){
2509         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2510         ptr= s->edge_emu_buffer;
2511     }
2512     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2513
2514     return;
2515 }
2516
2517 static inline void gmc_motion(MpegEncContext *s,
2518                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2519                                uint8_t **ref_picture)
2520 {
2521     uint8_t *ptr;
2522     int linesize, uvlinesize;
2523     const int a= s->sprite_warping_accuracy;
2524     int ox, oy;
2525
2526     linesize = s->linesize;
2527     uvlinesize = s->uvlinesize;
2528
2529     ptr = ref_picture[0];
2530
2531     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2532     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2533
2534     s->dsp.gmc(dest_y, ptr, linesize, 16,
2535            ox,
2536            oy,
2537            s->sprite_delta[0][0], s->sprite_delta[0][1],
2538            s->sprite_delta[1][0], s->sprite_delta[1][1],
2539            a+1, (1<<(2*a+1)) - s->no_rounding,
2540            s->h_edge_pos, s->v_edge_pos);
2541     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2542            ox + s->sprite_delta[0][0]*8,
2543            oy + s->sprite_delta[1][0]*8,
2544            s->sprite_delta[0][0], s->sprite_delta[0][1],
2545            s->sprite_delta[1][0], s->sprite_delta[1][1],
2546            a+1, (1<<(2*a+1)) - s->no_rounding,
2547            s->h_edge_pos, s->v_edge_pos);
2548
2549     if(s->flags&CODEC_FLAG_GRAY) return;
2550
2551     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2552     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2553
2554     ptr = ref_picture[1];
2555     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2556            ox,
2557            oy,
2558            s->sprite_delta[0][0], s->sprite_delta[0][1],
2559            s->sprite_delta[1][0], s->sprite_delta[1][1],
2560            a+1, (1<<(2*a+1)) - s->no_rounding,
2561            s->h_edge_pos>>1, s->v_edge_pos>>1);
2562
2563     ptr = ref_picture[2];
2564     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2565            ox,
2566            oy,
2567            s->sprite_delta[0][0], s->sprite_delta[0][1],
2568            s->sprite_delta[1][0], s->sprite_delta[1][1],
2569            a+1, (1<<(2*a+1)) - s->no_rounding,
2570            s->h_edge_pos>>1, s->v_edge_pos>>1);
2571 }
2572
2573 /**
2574  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2575  * @param buf destination buffer
2576  * @param src source buffer
2577  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2578  * @param block_w width of block
2579  * @param block_h height of block
2580  * @param src_x x coordinate of the top left sample of the block in the source buffer
2581  * @param src_y y coordinate of the top left sample of the block in the source buffer
2582  * @param w width of the source buffer
2583  * @param h height of the source buffer
2584  */
2585 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2586                                     int src_x, int src_y, int w, int h){
2587     int x, y;
2588     int start_y, start_x, end_y, end_x;
2589
2590     if(src_y>= h){
2591         src+= (h-1-src_y)*linesize;
2592         src_y=h-1;
2593     }else if(src_y<=-block_h){
2594         src+= (1-block_h-src_y)*linesize;
2595         src_y=1-block_h;
2596     }
2597     if(src_x>= w){
2598         src+= (w-1-src_x);
2599         src_x=w-1;
2600     }else if(src_x<=-block_w){
2601         src+= (1-block_w-src_x);
2602         src_x=1-block_w;
2603     }
2604
2605     start_y= FFMAX(0, -src_y);
2606     start_x= FFMAX(0, -src_x);
2607     end_y= FFMIN(block_h, h-src_y);
2608     end_x= FFMIN(block_w, w-src_x);
2609
2610     // copy existing part
2611     for(y=start_y; y<end_y; y++){
2612         for(x=start_x; x<end_x; x++){
2613             buf[x + y*linesize]= src[x + y*linesize];
2614         }
2615     }
2616
2617     //top
2618     for(y=0; y<start_y; y++){
2619         for(x=start_x; x<end_x; x++){
2620             buf[x + y*linesize]= buf[x + start_y*linesize];
2621         }
2622     }
2623
2624     //bottom
2625     for(y=end_y; y<block_h; y++){
2626         for(x=start_x; x<end_x; x++){
2627             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2628         }
2629     }
2630
2631     for(y=0; y<block_h; y++){
2632        //left
2633         for(x=0; x<start_x; x++){
2634             buf[x + y*linesize]= buf[start_x + y*linesize];
2635         }
2636
2637        //right
2638         for(x=end_x; x<block_w; x++){
2639             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2640         }
2641     }
2642 }
2643
2644 static inline int hpel_motion(MpegEncContext *s,
2645                                   uint8_t *dest, uint8_t *src,
2646                                   int field_based, int field_select,
2647                                   int src_x, int src_y,
2648                                   int width, int height, int stride,
2649                                   int h_edge_pos, int v_edge_pos,
2650                                   int w, int h, op_pixels_func *pix_op,
2651                                   int motion_x, int motion_y)
2652 {
2653     int dxy;
2654     int emu=0;
2655
2656     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2657     src_x += motion_x >> 1;
2658     src_y += motion_y >> 1;
2659
2660     /* WARNING: do no forget half pels */
2661     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2662     if (src_x == width)
2663         dxy &= ~1;
2664     src_y = clip(src_y, -16, height);
2665     if (src_y == height)
2666         dxy &= ~2;
2667     src += src_y * stride + src_x;
2668
2669     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2670         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2671            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2672             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2673                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2674             src= s->edge_emu_buffer;
2675             emu=1;
2676         }
2677     }
2678     if(field_select)
2679         src += s->linesize;
2680     pix_op[dxy](dest, src, stride, h);
2681     return emu;
2682 }
2683
2684 static inline int hpel_motion_lowres(MpegEncContext *s,
2685                                   uint8_t *dest, uint8_t *src,
2686                                   int field_based, int field_select,
2687                                   int src_x, int src_y,
2688                                   int width, int height, int stride,
2689                                   int h_edge_pos, int v_edge_pos,
2690                                   int w, int h, h264_chroma_mc_func *pix_op,
2691                                   int motion_x, int motion_y)
2692 {
2693     const int lowres= s->avctx->lowres;
2694     const int s_mask= (2<<lowres)-1;
2695     int emu=0;
2696     int sx, sy;
2697
2698     if(s->quarter_sample){
2699         motion_x/=2;
2700         motion_y/=2;
2701     }
2702
2703     sx= motion_x & s_mask;
2704     sy= motion_y & s_mask;
2705     src_x += motion_x >> (lowres+1);
2706     src_y += motion_y >> (lowres+1);
2707
2708     src += src_y * stride + src_x;
2709
2710     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2711        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2712         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2713                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2714         src= s->edge_emu_buffer;
2715         emu=1;
2716     }
2717
2718     sx <<= 2 - lowres;
2719     sy <<= 2 - lowres;
2720     if(field_select)
2721         src += s->linesize;
2722     pix_op[lowres](dest, src, stride, h, sx, sy);
2723     return emu;
2724 }
2725
2726 /* apply one mpeg motion vector to the three components */
2727 static always_inline void mpeg_motion(MpegEncContext *s,
2728                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2729                                int field_based, int bottom_field, int field_select,
2730                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2731                                int motion_x, int motion_y, int h)
2732 {
2733     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2734     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2735
2736 #if 0
2737 if(s->quarter_sample)
2738 {
2739     motion_x>>=1;
2740     motion_y>>=1;
2741 }
2742 #endif
2743
2744     v_edge_pos = s->v_edge_pos >> field_based;
2745     linesize   = s->current_picture.linesize[0] << field_based;
2746     uvlinesize = s->current_picture.linesize[1] << field_based;
2747
2748     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2749     src_x = s->mb_x* 16               + (motion_x >> 1);
2750     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2751
2752     if (s->out_format == FMT_H263) {
2753         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2754             mx = (motion_x>>1)|(motion_x&1);
2755             my = motion_y >>1;
2756             uvdxy = ((my & 1) << 1) | (mx & 1);
2757             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2758             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2759         }else{
2760             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2761             uvsrc_x = src_x>>1;
2762             uvsrc_y = src_y>>1;
2763         }
2764     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2765         mx = motion_x / 4;
2766         my = motion_y / 4;
2767         uvdxy = 0;
2768         uvsrc_x = s->mb_x*8 + mx;
2769         uvsrc_y = s->mb_y*8 + my;
2770     } else {
2771         if(s->chroma_y_shift){
2772             mx = motion_x / 2;
2773             my = motion_y / 2;
2774             uvdxy = ((my & 1) << 1) | (mx & 1);
2775             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2776             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2777         } else {
2778             if(s->chroma_x_shift){
2779             //Chroma422
2780                 mx = motion_x / 2;
2781                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2782                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2783                 uvsrc_y = src_y;
2784             } else {
2785             //Chroma444
2786                 uvdxy = dxy;
2787                 uvsrc_x = src_x;
2788                 uvsrc_y = src_y;
2789             }
2790         }
2791     }
2792
2793     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2794     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2795     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2796
2797     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2798        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2799             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2800                s->codec_id == CODEC_ID_MPEG1VIDEO){
2801                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2802                 return ;
2803             }
2804             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2805                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2806             ptr_y = s->edge_emu_buffer;
2807             if(!(s->flags&CODEC_FLAG_GRAY)){
2808                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2809                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2810                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2811                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2812                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2813                 ptr_cb= uvbuf;
2814                 ptr_cr= uvbuf+16;
2815             }
2816     }
2817
2818     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2819         dest_y += s->linesize;
2820         dest_cb+= s->uvlinesize;
2821         dest_cr+= s->uvlinesize;
2822     }
2823
2824     if(field_select){
2825         ptr_y += s->linesize;
2826         ptr_cb+= s->uvlinesize;
2827         ptr_cr+= s->uvlinesize;
2828     }
2829
2830     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2831
2832     if(!(s->flags&CODEC_FLAG_GRAY)){
2833         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2834         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2835     }
2836 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2837     if(s->out_format == FMT_H261){
2838         ff_h261_loop_filter(s);
2839     }
2840 #endif
2841 }
2842
2843 /* apply one mpeg motion vector to the three components */
2844 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2845                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2846                                int field_based, int bottom_field, int field_select,
2847                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2848                                int motion_x, int motion_y, int h)
2849 {
2850     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2851     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2852     const int lowres= s->avctx->lowres;
2853     const int block_s= 8>>lowres;
2854     const int s_mask= (2<<lowres)-1;
2855     const int h_edge_pos = s->h_edge_pos >> lowres;
2856     const int v_edge_pos = s->v_edge_pos >> lowres;
2857     linesize   = s->current_picture.linesize[0] << field_based;
2858     uvlinesize = s->current_picture.linesize[1] << field_based;
2859
2860     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2861         motion_x/=2;
2862         motion_y/=2;
2863     }
2864
2865     if(field_based){
2866         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2867     }
2868
2869     sx= motion_x & s_mask;
2870     sy= motion_y & s_mask;
2871     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2872     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2873
2874     if (s->out_format == FMT_H263) {
2875         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2876         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2877         uvsrc_x = src_x>>1;
2878         uvsrc_y = src_y>>1;
2879     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2880         mx = motion_x / 4;
2881         my = motion_y / 4;
2882         uvsx = (2*mx) & s_mask;
2883         uvsy = (2*my) & s_mask;
2884         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2885         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2886     } else {
2887         mx = motion_x / 2;
2888         my = motion_y / 2;
2889         uvsx = mx & s_mask;
2890         uvsy = my & s_mask;
2891         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2892         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2893     }
2894
2895     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2896     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2897     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2898
2899     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2900        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2901             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2902                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2903             ptr_y = s->edge_emu_buffer;
2904             if(!(s->flags&CODEC_FLAG_GRAY)){
2905                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2906                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2907                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2908                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2909                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2910                 ptr_cb= uvbuf;
2911                 ptr_cr= uvbuf+16;
2912             }
2913     }
2914
2915     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2916         dest_y += s->linesize;
2917         dest_cb+= s->uvlinesize;
2918         dest_cr+= s->uvlinesize;
2919     }
2920
2921     if(field_select){
2922         ptr_y += s->linesize;
2923         ptr_cb+= s->uvlinesize;
2924         ptr_cr+= s->uvlinesize;
2925     }
2926
2927     sx <<= 2 - lowres;
2928     sy <<= 2 - lowres;
2929     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2930
2931     if(!(s->flags&CODEC_FLAG_GRAY)){
2932         uvsx <<= 2 - lowres;
2933         uvsy <<= 2 - lowres;
2934         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2935         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2936     }
2937     //FIXME h261 lowres loop filter
2938 }
2939
2940 //FIXME move to dsputil, avg variant, 16x16 version
2941 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2942     int x;
2943     uint8_t * const top   = src[1];
2944     uint8_t * const left  = src[2];
2945     uint8_t * const mid   = src[0];
2946     uint8_t * const right = src[3];
2947     uint8_t * const bottom= src[4];
2948 #define OBMC_FILTER(x, t, l, m, r, b)\
2949     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2950 #define OBMC_FILTER4(x, t, l, m, r, b)\
2951     OBMC_FILTER(x         , t, l, m, r, b);\
2952     OBMC_FILTER(x+1       , t, l, m, r, b);\
2953     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2954     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2955
2956     x=0;
2957     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2958     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2959     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2960     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2961     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2962     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2963     x+= stride;
2964     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2965     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2966     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2967     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2968     x+= stride;
2969     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2970     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2971     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2972     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2973     x+= 2*stride;
2974     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2975     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2976     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2977     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2978     x+= 2*stride;
2979     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2980     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2981     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2982     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2983     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2984     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2985     x+= stride;
2986     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2987     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2988     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2989     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2990 }
2991
2992 /* obmc for 1 8x8 luma block */
2993 static inline void obmc_motion(MpegEncContext *s,
2994                                uint8_t *dest, uint8_t *src,
2995                                int src_x, int src_y,
2996                                op_pixels_func *pix_op,
2997                                int16_t mv[5][2]/* mid top left right bottom*/)
2998 #define MID    0
2999 {
3000     int i;
3001     uint8_t *ptr[5];
3002
3003     assert(s->quarter_sample==0);
3004
3005     for(i=0; i<5; i++){
3006         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3007             ptr[i]= ptr[MID];
3008         }else{
3009             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3010             hpel_motion(s, ptr[i], src, 0, 0,
3011                         src_x, src_y,
3012                         s->width, s->height, s->linesize,
3013                         s->h_edge_pos, s->v_edge_pos,
3014                         8, 8, pix_op,
3015                         mv[i][0], mv[i][1]);
3016         }
3017     }
3018
3019     put_obmc(dest, ptr, s->linesize);
3020 }
3021
3022 static inline void qpel_motion(MpegEncContext *s,
3023                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3024                                int field_based, int bottom_field, int field_select,
3025                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3026                                qpel_mc_func (*qpix_op)[16],
3027                                int motion_x, int motion_y, int h)
3028 {
3029     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3030     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3031
3032     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3033     src_x = s->mb_x *  16                 + (motion_x >> 2);
3034     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3035
3036     v_edge_pos = s->v_edge_pos >> field_based;
3037     linesize = s->linesize << field_based;
3038     uvlinesize = s->uvlinesize << field_based;
3039
3040     if(field_based){
3041         mx= motion_x/2;
3042         my= motion_y>>1;
3043     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3044         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3045         mx= (motion_x>>1) + rtab[motion_x&7];
3046         my= (motion_y>>1) + rtab[motion_y&7];
3047     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3048         mx= (motion_x>>1)|(motion_x&1);
3049         my= (motion_y>>1)|(motion_y&1);
3050     }else{
3051         mx= motion_x/2;
3052         my= motion_y/2;
3053     }
3054     mx= (mx>>1)|(mx&1);
3055     my= (my>>1)|(my&1);
3056
3057     uvdxy= (mx&1) | ((my&1)<<1);
3058     mx>>=1;
3059     my>>=1;
3060
3061     uvsrc_x = s->mb_x *  8                 + mx;
3062     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3063
3064     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3065     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3066     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3067
3068     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3069        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3070         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3071                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3072         ptr_y= s->edge_emu_buffer;
3073         if(!(s->flags&CODEC_FLAG_GRAY)){
3074             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3075             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3076                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3077             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3078                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3079             ptr_cb= uvbuf;
3080             ptr_cr= uvbuf + 16;
3081         }
3082     }
3083
3084     if(!field_based)
3085         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3086     else{
3087         if(bottom_field){
3088             dest_y += s->linesize;
3089             dest_cb+= s->uvlinesize;
3090             dest_cr+= s->uvlinesize;
3091         }
3092
3093         if(field_select){
3094             ptr_y  += s->linesize;
3095             ptr_cb += s->uvlinesize;
3096             ptr_cr += s->uvlinesize;
3097         }
3098         //damn interlaced mode
3099         //FIXME boundary mirroring is not exactly correct here
3100         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3101         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3102     }
3103     if(!(s->flags&CODEC_FLAG_GRAY)){
3104         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3105         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3106     }
3107 }
3108
3109 inline int ff_h263_round_chroma(int x){
3110     if (x >= 0)
3111         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3112     else {
3113         x = -x;
3114         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3115     }
3116 }
3117
3118 /**
3119  * h263 chorma 4mv motion compensation.
3120  */
3121 static inline void chroma_4mv_motion(MpegEncContext *s,
3122                                      uint8_t *dest_cb, uint8_t *dest_cr,
3123                                      uint8_t **ref_picture,
3124                                      op_pixels_func *pix_op,
3125                                      int mx, int my){
3126     int dxy, emu=0, src_x, src_y, offset;
3127     uint8_t *ptr;
3128
3129     /* In case of 8X8, we construct a single chroma motion vector
3130        with a special rounding */
3131     mx= ff_h263_round_chroma(mx);
3132     my= ff_h263_round_chroma(my);
3133
3134     dxy = ((my & 1) << 1) | (mx & 1);
3135     mx >>= 1;
3136     my >>= 1;
3137
3138     src_x = s->mb_x * 8 + mx;
3139     src_y = s->mb_y * 8 + my;
3140     src_x = clip(src_x, -8, s->width/2);
3141     if (src_x == s->width/2)
3142         dxy &= ~1;
3143     src_y = clip(src_y, -8, s->height/2);
3144     if (src_y == s->height/2)
3145         dxy &= ~2;
3146
3147     offset = (src_y * (s->uvlinesize)) + src_x;
3148     ptr = ref_picture[1] + offset;
3149     if(s->flags&CODEC_FLAG_EMU_EDGE){
3150         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3151            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3152             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3153             ptr= s->edge_emu_buffer;
3154             emu=1;
3155         }
3156     }
3157     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3158
3159     ptr = ref_picture[2] + offset;
3160     if(emu){
3161         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3162         ptr= s->edge_emu_buffer;
3163     }
3164     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3165 }
3166
3167 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3168                                      uint8_t *dest_cb, uint8_t *dest_cr,
3169                                      uint8_t **ref_picture,
3170                                      h264_chroma_mc_func *pix_op,
3171                                      int mx, int my){
3172     const int lowres= s->avctx->lowres;
3173     const int block_s= 8>>lowres;
3174     const int s_mask= (2<<lowres)-1;
3175     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3176     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3177     int emu=0, src_x, src_y, offset, sx, sy;
3178     uint8_t *ptr;
3179
3180     if(s->quarter_sample){
3181         mx/=2;
3182         my/=2;
3183     }
3184
3185     /* In case of 8X8, we construct a single chroma motion vector
3186        with a special rounding */
3187     mx= ff_h263_round_chroma(mx);
3188     my= ff_h263_round_chroma(my);
3189
3190     sx= mx & s_mask;
3191     sy= my & s_mask;
3192     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3193     src_y = s->mb_y*block_s + (my >> (lowres+1));
3194
3195     offset = src_y * s->uvlinesize + src_x;
3196     ptr = ref_picture[1] + offset;
3197     if(s->flags&CODEC_FLAG_EMU_EDGE){
3198         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3199            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3200             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3201             ptr= s->edge_emu_buffer;
3202             emu=1;
3203         }
3204     }
3205     sx <<= 2 - lowres;
3206     sy <<= 2 - lowres;
3207     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3208
3209     ptr = ref_picture[2] + offset;
3210     if(emu){
3211         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3212         ptr= s->edge_emu_buffer;
3213     }
3214     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3215 }
3216
3217 /**
3218  * motion compensation of a single macroblock
3219  * @param s context
3220  * @param dest_y luma destination pointer
3221  * @param dest_cb chroma cb/u destination pointer
3222  * @param dest_cr chroma cr/v destination pointer
3223  * @param dir direction (0->forward, 1->backward)
3224  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3225  * @param pic_op halfpel motion compensation function (average or put normally)
3226  * @param pic_op qpel motion compensation function (average or put normally)
3227  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3228  */
3229 static inline void MPV_motion(MpegEncContext *s,
3230                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3231                               int dir, uint8_t **ref_picture,
3232                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3233 {
3234     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3235     int mb_x, mb_y, i;
3236     uint8_t *ptr, *dest;
3237
3238     mb_x = s->mb_x;
3239     mb_y = s->mb_y;
3240
3241     if(s->obmc && s->pict_type != B_TYPE){
3242         int16_t mv_cache[4][4][2];
3243         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3244         const int mot_stride= s->b8_stride;
3245         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3246
3247         assert(!s->mb_skipped);
3248
3249         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3250         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3251         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3252
3253         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3254             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3255         }else{
3256             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3257         }
3258
3259         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3260             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3261             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3262         }else{
3263             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3264             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3265         }
3266
3267         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3268             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3269             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3270         }else{
3271             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3272             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3273         }
3274
3275         mx = 0;
3276         my = 0;
3277         for(i=0;i<4;i++) {
3278             const int x= (i&1)+1;
3279             const int y= (i>>1)+1;
3280             int16_t mv[5][2]= {
3281                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3282                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3283                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3284                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3285                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3286             //FIXME cleanup
3287             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3288                         ref_picture[0],
3289                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3290                         pix_op[1],
3291                         mv);
3292
3293             mx += mv[0][0];
3294             my += mv[0][1];
3295         }
3296         if(!(s->flags&CODEC_FLAG_GRAY))
3297             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3298
3299         return;
3300     }
3301
3302     switch(s->mv_type) {
3303     case MV_TYPE_16X16:
3304         if(s->mcsel){
3305             if(s->real_sprite_warping_points==1){
3306                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3307                             ref_picture);
3308             }else{
3309                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3310                             ref_picture);
3311             }
3312         }else if(s->quarter_sample){
3313             qpel_motion(s, dest_y, dest_cb, dest_cr,
3314                         0, 0, 0,
3315                         ref_picture, pix_op, qpix_op,
3316                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3317         }else if(s->mspel){
3318             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3319                         ref_picture, pix_op,
3320                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3321         }else
3322         {
3323             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3324                         0, 0, 0,
3325                         ref_picture, pix_op,
3326                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3327         }
3328         break;
3329     case MV_TYPE_8X8:
3330         mx = 0;
3331         my = 0;
3332         if(s->quarter_sample){
3333             for(i=0;i<4;i++) {
3334                 motion_x = s->mv[dir][i][0];
3335                 motion_y = s->mv[dir][i][1];
3336
3337                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3338                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3339                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3340
3341                 /* WARNING: do no forget half pels */
3342                 src_x = clip(src_x, -16, s->width);
3343                 if (src_x == s->width)
3344                     dxy &= ~3;
3345                 src_y = clip(src_y, -16, s->height);
3346                 if (src_y == s->height)
3347                     dxy &= ~12;
3348
3349                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3350                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3351                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3352                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3353                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3354                         ptr= s->edge_emu_buffer;
3355                     }
3356                 }
3357                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3358                 qpix_op[1][dxy](dest, ptr, s->linesize);
3359
3360                 mx += s->mv[dir][i][0]/2;
3361                 my += s->mv[dir][i][1]/2;
3362             }
3363         }else{
3364             for(i=0;i<4;i++) {
3365                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3366                             ref_picture[0], 0, 0,
3367                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3368                             s->width, s->height, s->linesize,
3369                             s->h_edge_pos, s->v_edge_pos,
3370                             8, 8, pix_op[1],
3371                             s->mv[dir][i][0], s->mv[dir][i][1]);
3372
3373                 mx += s->mv[dir][i][0];
3374                 my += s->mv[dir][i][1];
3375             }
3376         }
3377
3378         if(!(s->flags&CODEC_FLAG_GRAY))
3379             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3380         break;
3381     case MV_TYPE_FIELD:
3382         if (s->picture_structure == PICT_FRAME) {
3383             if(s->quarter_sample){
3384                 for(i=0; i<2; i++){
3385                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3386                                 1, i, s->field_select[dir][i],
3387                                 ref_picture, pix_op, qpix_op,
3388                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3389                 }
3390             }else{
3391                 /* top field */
3392                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3393                             1, 0, s->field_select[dir][0],
3394                             ref_picture, pix_op,
3395                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3396                 /* bottom field */
3397                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3398                             1, 1, s->field_select[dir][1],
3399                             ref_picture, pix_op,
3400                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3401             }
3402         } else {
3403             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3404                 ref_picture= s->current_picture_ptr->data;
3405             }
3406
3407             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3408                         0, 0, s->field_select[dir][0],
3409                         ref_picture, pix_op,
3410                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3411         }
3412         break;
3413     case MV_TYPE_16X8:
3414         for(i=0; i<2; i++){
3415             uint8_t ** ref2picture;
3416
3417             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3418                 ref2picture= ref_picture;
3419             }else{
3420                 ref2picture= s->current_picture_ptr->data;
3421             }
3422
3423             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3424                         0, 0, s->field_select[dir][i],
3425                         ref2picture, pix_op,
3426                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3427
3428             dest_y += 16*s->linesize;
3429             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3430             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3431         }
3432         break;
3433     case MV_TYPE_DMV:
3434         if(s->picture_structure == PICT_FRAME){
3435             for(i=0; i<2; i++){
3436                 int j;
3437                 for(j=0; j<2; j++){
3438                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3439                                 1, j, j^i,
3440                                 ref_picture, pix_op,
3441                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3442                 }
3443                 pix_op = s->dsp.avg_pixels_tab;
3444             }
3445         }else{
3446             for(i=0; i<2; i++){
3447                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3448                             0, 0, s->picture_structure != i+1,
3449                             ref_picture, pix_op,
3450                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3451
3452                 // after put we make avg of the same block
3453                 pix_op=s->dsp.avg_pixels_tab;
3454
3455                 //opposite parity is always in the same frame if this is second field
3456                 if(!s->first_field){
3457                     ref_picture = s->current_picture_ptr->data;
3458                 }
3459             }
3460         }
3461     break;
3462     default: assert(0);
3463     }
3464 }
3465
3466 /**
3467  * motion compensation of a single macroblock
3468  * @param s context
3469  * @param dest_y luma destination pointer
3470  * @param dest_cb chroma cb/u destination pointer
3471  * @param dest_cr chroma cr/v destination pointer
3472  * @param dir direction (0->forward, 1->backward)
3473  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3474  * @param pic_op halfpel motion compensation function (average or put normally)
3475  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3476  */
3477 static inline void MPV_motion_lowres(MpegEncContext *s,
3478                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3479                               int dir, uint8_t **ref_picture,
3480                               h264_chroma_mc_func *pix_op)
3481 {
3482     int mx, my;
3483     int mb_x, mb_y, i;
3484     const int lowres= s->avctx->lowres;
3485     const int block_s= 8>>lowres;
3486
3487     mb_x = s->mb_x;
3488     mb_y = s->mb_y;
3489
3490     switch(s->mv_type) {
3491     case MV_TYPE_16X16:
3492         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3493                     0, 0, 0,
3494                     ref_picture, pix_op,
3495                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3496         break;
3497     case MV_TYPE_8X8:
3498         mx = 0;
3499         my = 0;
3500             for(i=0;i<4;i++) {
3501                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3502                             ref_picture[0], 0, 0,
3503                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3504                             s->width, s->height, s->linesize,
3505                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3506                             block_s, block_s, pix_op,
3507                             s->mv[dir][i][0], s->mv[dir][i][1]);
3508
3509                 mx += s->mv[dir][i][0];
3510                 my += s->mv[dir][i][1];
3511             }
3512
3513         if(!(s->flags&CODEC_FLAG_GRAY))
3514             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3515         break;
3516     case MV_TYPE_FIELD:
3517         if (s->picture_structure == PICT_FRAME) {
3518             /* top field */
3519             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3520                         1, 0, s->field_select[dir][0],
3521                         ref_picture, pix_op,
3522                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3523             /* bottom field */
3524             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3525                         1, 1, s->field_select[dir][1],
3526                         ref_picture, pix_op,
3527                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3528         } else {
3529             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3530                 ref_picture= s->current_picture_ptr->data;
3531             }
3532
3533             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3534                         0, 0, s->field_select[dir][0],
3535                         ref_picture, pix_op,
3536                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3537         }
3538         break;
3539     case MV_TYPE_16X8:
3540         for(i=0; i<2; i++){
3541             uint8_t ** ref2picture;
3542
3543             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3544                 ref2picture= ref_picture;
3545             }else{
3546                 ref2picture= s->current_picture_ptr->data;
3547             }
3548
3549             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3550                         0, 0, s->field_select[dir][i],
3551                         ref2picture, pix_op,
3552                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3553
3554             dest_y += 2*block_s*s->linesize;
3555             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3556             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3557         }
3558         break;
3559     case MV_TYPE_DMV:
3560         if(s->picture_structure == PICT_FRAME){
3561             for(i=0; i<2; i++){
3562                 int j;
3563                 for(j=0; j<2; j++){
3564                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3565                                 1, j, j^i,
3566                                 ref_picture, pix_op,
3567                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3568                 }
3569                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3570             }
3571         }else{
3572             for(i=0; i<2; i++){
3573                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3574                             0, 0, s->picture_structure != i+1,
3575                             ref_picture, pix_op,
3576                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3577
3578                 // after put we make avg of the same block
3579                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3580
3581                 //opposite parity is always in the same frame if this is second field
3582                 if(!s->first_field){
3583                     ref_picture = s->current_picture_ptr->data;
3584                 }
3585             }
3586         }
3587     break;
3588     default: assert(0);
3589     }
3590 }
3591
3592 /* put block[] to dest[] */
3593 static inline void put_dct(MpegEncContext *s,
3594                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3595 {
3596     s->dct_unquantize_intra(s, block, i, qscale);
3597     s->dsp.idct_put (dest, line_size, block);
3598 }
3599
3600 /* add block[] to dest[] */
3601 static inline void add_dct(MpegEncContext *s,
3602                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3603 {
3604     if (s->block_last_index[i] >= 0) {
3605         s->dsp.idct_add (dest, line_size, block);
3606     }
3607 }
3608
3609 static inline void add_dequant_dct(MpegEncContext *s,
3610                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3611 {
3612     if (s->block_last_index[i] >= 0) {
3613         s->dct_unquantize_inter(s, block, i, qscale);
3614
3615         s->dsp.idct_add (dest, line_size, block);
3616     }
3617 }
3618
3619 /**
3620  * cleans dc, ac, coded_block for the current non intra MB
3621  */
3622 void ff_clean_intra_table_entries(MpegEncContext *s)
3623 {
3624     int wrap = s->b8_stride;
3625     int xy = s->block_index[0];
3626
3627     s->dc_val[0][xy           ] =
3628     s->dc_val[0][xy + 1       ] =
3629     s->dc_val[0][xy     + wrap] =
3630     s->dc_val[0][xy + 1 + wrap] = 1024;
3631     /* ac pred */
3632     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3633     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3634     if (s->msmpeg4_version>=3) {
3635         s->coded_block[xy           ] =
3636         s->coded_block[xy + 1       ] =
3637         s->coded_block[xy     + wrap] =
3638         s->coded_block[xy + 1 + wrap] = 0;
3639     }
3640     /* chroma */
3641     wrap = s->mb_stride;
3642     xy = s->mb_x + s->mb_y * wrap;
3643     s->dc_val[1][xy] =
3644     s->dc_val[2][xy] = 1024;
3645     /* ac pred */
3646     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3647     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3648
3649     s->mbintra_table[xy]= 0;
3650 }
3651
3652 /* generic function called after a macroblock has been parsed by the
3653    decoder or after it has been encoded by the encoder.
3654
3655    Important variables used:
3656    s->mb_intra : true if intra macroblock
3657    s->mv_dir   : motion vector direction
3658    s->mv_type  : motion vector type
3659    s->mv       : motion vector
3660    s->interlaced_dct : true if interlaced dct used (mpeg2)
3661  */
3662 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3663 {
3664     int mb_x, mb_y;
3665     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3666 #ifdef HAVE_XVMC
3667     if(s->avctx->xvmc_acceleration){
3668         XVMC_decode_mb(s);//xvmc uses pblocks
3669         return;
3670     }
3671 #endif
3672
3673     mb_x = s->mb_x;
3674     mb_y = s->mb_y;
3675
3676     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3677        /* save DCT coefficients */
3678        int i,j;
3679        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3680        for(i=0; i<6; i++)
3681            for(j=0; j<64; j++)
3682                *dct++ = block[i][s->dsp.idct_permutation[j]];
3683     }
3684
3685     s->current_picture.qscale_table[mb_xy]= s->qscale;
3686
3687     /* update DC predictors for P macroblocks */
3688     if (!s->mb_intra) {
3689         if (s->h263_pred || s->h263_aic) {
3690             if(s->mbintra_table[mb_xy])
3691                 ff_clean_intra_table_entries(s);
3692         } else {
3693             s->last_dc[0] =
3694             s->last_dc[1] =
3695             s->last_dc[2] = 128 << s->intra_dc_precision;
3696         }
3697     }
3698     else if (s->h263_pred || s->h263_aic)
3699         s->mbintra_table[mb_xy]=1;
3700
3701     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3702         uint8_t *dest_y, *dest_cb, *dest_cr;
3703         int dct_linesize, dct_offset;
3704         op_pixels_func (*op_pix)[4];
3705         qpel_mc_func (*op_qpix)[16];
3706         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3707         const int uvlinesize= s->current_picture.linesize[1];
3708         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3709         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3710
3711         /* avoid copy if macroblock skipped in last frame too */
3712         /* skip only during decoding as we might trash the buffers during encoding a bit */
3713         if(!s->encoding){
3714             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3715             const int age= s->current_picture.age;
3716
3717             assert(age);
3718
3719             if (s->mb_skipped) {
3720                 s->mb_skipped= 0;
3721                 assert(s->pict_type!=I_TYPE);
3722
3723                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3724                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3725
3726                 /* if previous was skipped too, then nothing to do !  */
3727                 if (*mbskip_ptr >= age && s->current_picture.reference){
3728                     return;
3729                 }
3730             } else if(!s->current_picture.reference){
3731                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3732                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3733             } else{
3734                 *mbskip_ptr = 0; /* not skipped */
3735             }
3736         }
3737
3738         dct_linesize = linesize << s->interlaced_dct;
3739         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3740
3741         if(readable){
3742             dest_y=  s->dest[0];
3743             dest_cb= s->dest[1];
3744             dest_cr= s->dest[2];
3745         }else{
3746             dest_y = s->b_scratchpad;
3747             dest_cb= s->b_scratchpad+16*linesize;
3748             dest_cr= s->b_scratchpad+32*linesize;
3749         }
3750
3751         if (!s->mb_intra) {
3752             /* motion handling */
3753             /* decoding or more than one mb_type (MC was already done otherwise) */
3754             if(!s->encoding){
3755                 if(lowres_flag){
3756                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3757
3758                     if (s->mv_dir & MV_DIR_FORWARD) {
3759                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3760                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3761                     }
3762                     if (s->mv_dir & MV_DIR_BACKWARD) {
3763                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3764                     }
3765                 }else{
3766                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3767                         op_pix = s->dsp.put_pixels_tab;
3768                         op_qpix= s->dsp.put_qpel_pixels_tab;
3769                     }else{
3770                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3771                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3772                     }
3773                     if (s->mv_dir & MV_DIR_FORWARD) {
3774                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3775                         op_pix = s->dsp.avg_pixels_tab;
3776                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3777                     }
3778                     if (s->mv_dir & MV_DIR_BACKWARD) {
3779                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3780                     }
3781                 }
3782             }
3783
3784             /* skip dequant / idct if we are really late ;) */
3785             if(s->hurry_up>1) goto skip_idct;
3786             if(s->avctx->skip_idct){
3787                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3788                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3789                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3790                     goto skip_idct;
3791             }
3792
3793             /* add dct residue */
3794             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3795                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3796                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3797                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3798                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3799                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3800
3801                 if(!(s->flags&CODEC_FLAG_GRAY)){
3802                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3803                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3804                 }
3805             } else if(s->codec_id != CODEC_ID_WMV2){
3806                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3807                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3808                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3809                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3810
3811                 if(!(s->flags&CODEC_FLAG_GRAY)){
3812                     if(s->chroma_y_shift){//Chroma420
3813                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3814                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3815                     }else{
3816                         //chroma422
3817                         dct_linesize = uvlinesize << s->interlaced_dct;
3818                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3819
3820                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3821                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3822                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3823                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3824                         if(!s->chroma_x_shift){//Chroma444
3825                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3826                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3827                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3828                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3829                         }
3830                     }
3831                 }//fi gray
3832             }
3833             else{
3834                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3835             }
3836         } else {
3837             /* dct only in intra block */
3838             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3839                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3840                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3841                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3842                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3843
3844                 if(!(s->flags&CODEC_FLAG_GRAY)){
3845                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3846                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3847                 }
3848             }else{
3849                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3850                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3851                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3852                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3853
3854                 if(!(s->flags&CODEC_FLAG_GRAY)){
3855                     if(s->chroma_y_shift){
3856                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3857                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3858                     }else{
3859
3860                         dct_linesize = uvlinesize << s->interlaced_dct;
3861                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3862
3863                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3864                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3865                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3866                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3867                         if(!s->chroma_x_shift){//Chroma444
3868                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3869                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3870                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3871                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3872                         }
3873                     }
3874                 }//gray
3875             }
3876         }
3877 skip_idct:
3878         if(!readable){
3879             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3880             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3881             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3882         }
3883     }
3884 }
3885
3886 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3887     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3888     else                  MPV_decode_mb_internal(s, block, 0);
3889 }
3890
3891 #ifdef CONFIG_ENCODERS
3892
3893 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3894 {
3895     static const char tab[64]=
3896         {3,2,2,1,1,1,1,1,
3897          1,1,1,1,1,1,1,1,
3898          1,1,1,1,1,1,1,1,
3899          0,0,0,0,0,0,0,0,
3900          0,0,0,0,0,0,0,0,
3901          0,0,0,0,0,0,0,0,
3902          0,0,0,0,0,0,0,0,
3903          0,0,0,0,0,0,0,0};
3904     int score=0;
3905     int run=0;
3906     int i;
3907     DCTELEM *block= s->block[n];
3908     const int last_index= s->block_last_index[n];
3909     int skip_dc;
3910
3911     if(threshold<0){
3912         skip_dc=0;
3913         threshold= -threshold;
3914     }else
3915         skip_dc=1;
3916
3917     /* are all which we could set to zero are allready zero? */
3918     if(last_index<=skip_dc - 1) return;
3919
3920     for(i=0; i<=last_index; i++){
3921         const int j = s->intra_scantable.permutated[i];
3922         const int level = ABS(block[j]);
3923         if(level==1){
3924             if(skip_dc && i==0) continue;
3925             score+= tab[run];
3926             run=0;
3927         }else if(level>1){
3928             return;
3929         }else{
3930             run++;
3931         }
3932     }
3933     if(score >= threshold) return;
3934     for(i=skip_dc; i<=last_index; i++){
3935         const int j = s->intra_scantable.permutated[i];
3936         block[j]=0;
3937     }
3938     if(block[0]) s->block_last_index[n]= 0;
3939     else         s->block_last_index[n]= -1;
3940 }
3941
3942 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3943 {
3944     int i;
3945     const int maxlevel= s->max_qcoeff;
3946     const int minlevel= s->min_qcoeff;
3947     int overflow=0;
3948
3949     if(s->mb_intra){
3950         i=1; //skip clipping of intra dc
3951     }else
3952         i=0;
3953
3954     for(;i<=last_index; i++){
3955         const int j= s->intra_scantable.permutated[i];
3956         int level = block[j];
3957
3958         if     (level>maxlevel){
3959             level=maxlevel;
3960             overflow++;
3961         }else if(level<minlevel){
3962             level=minlevel;
3963             overflow++;
3964         }
3965
3966         block[j]= level;
3967     }
3968
3969     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3970         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
3971 }
3972
3973 #endif //CONFIG_ENCODERS
3974
3975 /**
3976  *
3977  * @param h is the normal height, this will be reduced automatically if needed for the last row
3978  */
3979 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3980     if (s->avctx->draw_horiz_band) {
3981         AVFrame *src;
3982         int offset[4];
3983
3984         if(s->picture_structure != PICT_FRAME){
3985             h <<= 1;
3986             y <<= 1;
3987             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3988         }
3989
3990         h= FFMIN(h, s->avctx->height - y);
3991
3992         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3993             src= (AVFrame*)s->current_picture_ptr;
3994         else if(s->last_picture_ptr)
3995             src= (AVFrame*)s->last_picture_ptr;
3996         else
3997             return;
3998
3999         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4000             offset[0]=
4001             offset[1]=
4002             offset[2]=
4003             offset[3]= 0;
4004         }else{
4005             offset[0]= y * s->linesize;;
4006             offset[1]=
4007             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4008             offset[3]= 0;
4009         }
4010
4011         emms_c();
4012
4013         s->avctx->draw_horiz_band(s->avctx, src, offset,
4014                                   y, s->picture_structure, h);
4015     }
4016 }
4017
4018 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4019     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4020     const int uvlinesize= s->current_picture.linesize[1];
4021     const int mb_size= 4 - s->avctx->lowres;
4022
4023     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4024     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4025     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4026     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4027     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4028     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4029     //block_index is not used by mpeg2, so it is not affected by chroma_format
4030
4031     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4032     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4033     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4034
4035     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4036     {
4037         s->dest[0] += s->mb_y *   linesize << mb_size;
4038         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4039         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4040     }
4041 }
4042
4043 #ifdef CONFIG_ENCODERS
4044
4045 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4046     int x, y;
4047 //FIXME optimize
4048     for(y=0; y<8; y++){
4049         for(x=0; x<8; x++){
4050             int x2, y2;
4051             int sum=0;
4052             int sqr=0;
4053             int count=0;
4054
4055             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4056                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4057                     int v= ptr[x2 + y2*stride];
4058                     sum += v;
4059                     sqr += v*v;
4060                     count++;
4061                 }
4062             }
4063             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4064         }
4065     }
4066 }
4067
4068 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4069 {
4070     int16_t weight[6][64];
4071     DCTELEM orig[6][64];
4072     const int mb_x= s->mb_x;
4073     const int mb_y= s->mb_y;
4074     int i;
4075     int skip_dct[6];
4076     int dct_offset   = s->linesize*8; //default for progressive frames
4077     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4078     int wrap_y, wrap_c;
4079
4080     for(i=0; i<6; i++) skip_dct[i]=0;
4081
4082     if(s->adaptive_quant){
4083         const int last_qp= s->qscale;
4084         const int mb_xy= mb_x + mb_y*s->mb_stride;
4085
4086         s->lambda= s->lambda_table[mb_xy];
4087         update_qscale(s);
4088
4089         if(!(s->flags&CODEC_FLAG_QP_RD)){
4090             s->dquant= s->qscale - last_qp;
4091
4092             if(s->out_format==FMT_H263){
4093                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4094
4095                 if(s->codec_id==CODEC_ID_MPEG4){
4096                     if(!s->mb_intra){
4097                         if(s->pict_type == B_TYPE){
4098                             if(s->dquant&1)
4099                                 s->dquant= (s->dquant/2)*2;
4100                             if(s->mv_dir&MV_DIRECT)
4101                                 s->dquant= 0;
4102                         }
4103                         if(s->mv_type==MV_TYPE_8X8)
4104                             s->dquant=0;
4105                     }
4106                 }
4107             }
4108         }
4109         ff_set_qscale(s, last_qp + s->dquant);
4110     }else if(s->flags&CODEC_FLAG_QP_RD)
4111         ff_set_qscale(s, s->qscale + s->dquant);
4112
4113     wrap_y = s->linesize;
4114     wrap_c = s->uvlinesize;
4115     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4116     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4117     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4118
4119     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4120         uint8_t *ebuf= s->edge_emu_buffer + 32;
4121         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4122         ptr_y= ebuf;
4123         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4124         ptr_cb= ebuf+18*wrap_y;
4125         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4126         ptr_cr= ebuf+18*wrap_y+8;
4127     }
4128
4129     if (s->mb_intra) {
4130         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4131             int progressive_score, interlaced_score;
4132
4133             s->interlaced_dct=0;
4134             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4135                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4136
4137             if(progressive_score > 0){
4138                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4139                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4140                 if(progressive_score > interlaced_score){
4141                     s->interlaced_dct=1;
4142
4143                     dct_offset= wrap_y;
4144                     wrap_y<<=1;
4145                 }
4146             }
4147         }
4148
4149         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4150         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4151         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4152         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4153
4154         if(s->flags&CODEC_FLAG_GRAY){
4155             skip_dct[4]= 1;
4156             skip_dct[5]= 1;
4157         }else{
4158             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4159             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4160         }
4161     }else{
4162         op_pixels_func (*op_pix)[4];
4163         qpel_mc_func (*op_qpix)[16];
4164         uint8_t *dest_y, *dest_cb, *dest_cr;
4165
4166         dest_y  = s->dest[0];
4167         dest_cb = s->dest[1];
4168         dest_cr = s->dest[2];
4169
4170         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4171             op_pix = s->dsp.put_pixels_tab;
4172             op_qpix= s->dsp.put_qpel_pixels_tab;
4173         }else{
4174             op_pix = s->dsp.put_no_rnd_pixels_tab;
4175             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4176         }
4177
4178         if (s->mv_dir & MV_DIR_FORWARD) {
4179             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4180             op_pix = s->dsp.avg_pixels_tab;
4181             op_qpix= s->dsp.avg_qpel_pixels_tab;
4182         }
4183         if (s->mv_dir & MV_DIR_BACKWARD) {
4184             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4185         }
4186
4187         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4188             int progressive_score, interlaced_score;
4189
4190             s->interlaced_dct=0;
4191             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4192                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4193
4194             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4195
4196             if(progressive_score>0){
4197                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4198                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4199
4200                 if(progressive_score > interlaced_score){
4201                     s->interlaced_dct=1;
4202
4203                     dct_offset= wrap_y;
4204                     wrap_y<<=1;
4205                 }
4206             }
4207         }
4208
4209         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4210         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4211         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4212         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4213
4214         if(s->flags&CODEC_FLAG_GRAY){
4215             skip_dct[4]= 1;
4216             skip_dct[5]= 1;
4217         }else{
4218             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4219             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4220         }
4221         /* pre quantization */
4222         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4223             //FIXME optimize
4224             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4225             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4226             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4227             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4228             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4229             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4230         }
4231     }
4232
4233     if(s->avctx->quantizer_noise_shaping){
4234         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4235         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4236         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4237         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4238         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4239         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4240         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4241     }
4242
4243     /* DCT & quantize */
4244     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4245     {
4246         for(i=0;i<6;i++) {
4247             if(!skip_dct[i]){
4248                 int overflow;
4249                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4250             // FIXME we could decide to change to quantizer instead of clipping
4251             // JS: I don't think that would be a good idea it could lower quality instead
4252             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4253                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4254             }else
4255                 s->block_last_index[i]= -1;
4256         }
4257         if(s->avctx->quantizer_noise_shaping){
4258             for(i=0;i<6;i++) {
4259                 if(!skip_dct[i]){
4260                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4261                 }
4262             }
4263         }
4264
4265         if(s->luma_elim_threshold && !s->mb_intra)
4266             for(i=0; i<4; i++)
4267                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4268         if(s->chroma_elim_threshold && !s->mb_intra)
4269             for(i=4; i<6; i++)
4270                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4271
4272         if(s->flags & CODEC_FLAG_CBP_RD){
4273             for(i=0;i<6;i++) {
4274                 if(s->block_last_index[i] == -1)
4275                     s->coded_score[i]= INT_MAX/256;
4276             }
4277         }
4278     }
4279
4280     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4281         s->block_last_index[4]=
4282         s->block_last_index[5]= 0;
4283         s->block[4][0]=
4284         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4285     }
4286
4287     //non c quantize code returns incorrect block_last_index FIXME
4288     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4289         for(i=0; i<6; i++){
4290             int j;
4291             if(s->block_last_index[i]>0){
4292                 for(j=63; j>0; j--){
4293                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4294                 }
4295                 s->block_last_index[i]= j;
4296             }
4297         }
4298     }
4299
4300     /* huffman encode */
4301     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4302     case CODEC_ID_MPEG1VIDEO:
4303     case CODEC_ID_MPEG2VIDEO:
4304         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4305     case CODEC_ID_MPEG4:
4306         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4307     case CODEC_ID_MSMPEG4V2:
4308     case CODEC_ID_MSMPEG4V3:
4309     case CODEC_ID_WMV1:
4310         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4311     case CODEC_ID_WMV2:
4312          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4313 #ifdef CONFIG_H261_ENCODER
4314     case CODEC_ID_H261:
4315         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4316 #endif
4317     case CODEC_ID_H263:
4318     case CODEC_ID_H263P:
4319     case CODEC_ID_FLV1:
4320     case CODEC_ID_RV10:
4321     case CODEC_ID_RV20:
4322         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4323     case CODEC_ID_MJPEG:
4324         mjpeg_encode_mb(s, s->block); break;
4325     default:
4326         assert(0);
4327     }
4328 }
4329
4330 #endif //CONFIG_ENCODERS
4331
4332 void ff_mpeg_flush(AVCodecContext *avctx){
4333     int i;
4334     MpegEncContext *s = avctx->priv_data;
4335
4336     if(s==NULL || s->picture==NULL)
4337         return;
4338
4339     for(i=0; i<MAX_PICTURE_COUNT; i++){
4340        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4341                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4342         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4343     }
4344     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4345
4346     s->mb_x= s->mb_y= 0;
4347
4348     s->parse_context.state= -1;
4349     s->parse_context.frame_start_found= 0;
4350     s->parse_context.overread= 0;
4351     s->parse_context.overread_index= 0;
4352     s->parse_context.index= 0;
4353     s->parse_context.last_index= 0;
4354     s->bitstream_buffer_size=0;
4355 }
4356
4357 #ifdef CONFIG_ENCODERS
4358 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4359 {
4360     const uint16_t *srcw= (uint16_t*)src;
4361     int words= length>>4;
4362     int bits= length&15;
4363     int i;
4364
4365     if(length==0) return;
4366
4367     if(words < 16){
4368         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4369     }else if(put_bits_count(pb)&7){
4370         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4371     }else{
4372         for(i=0; put_bits_count(pb)&31; i++)
4373             put_bits(pb, 8, src[i]);
4374         flush_put_bits(pb);
4375         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4376         skip_put_bytes(pb, 2*words-i);
4377     }
4378
4379     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4380 }
4381
4382 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4383     int i;
4384
4385     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4386
4387     /* mpeg1 */
4388     d->mb_skip_run= s->mb_skip_run;
4389     for(i=0; i<3; i++)
4390         d->last_dc[i]= s->last_dc[i];
4391
4392     /* statistics */
4393     d->mv_bits= s->mv_bits;
4394     d->i_tex_bits= s->i_tex_bits;
4395     d->p_tex_bits= s->p_tex_bits;
4396     d->i_count= s->i_count;
4397     d->f_count= s->f_count;
4398     d->b_count= s->b_count;
4399     d->skip_count= s->skip_count;
4400     d->misc_bits= s->misc_bits;
4401     d->last_bits= 0;
4402
4403     d->mb_skipped= 0;
4404     d->qscale= s->qscale;
4405     d->dquant= s->dquant;
4406 }
4407
4408 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4409     int i;
4410
4411     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4412     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4413
4414     /* mpeg1 */
4415     d->mb_skip_run= s->mb_skip_run;
4416     for(i=0; i<3; i++)
4417         d->last_dc[i]= s->last_dc[i];
4418
4419     /* statistics */
4420     d->mv_bits= s->mv_bits;
4421     d->i_tex_bits= s->i_tex_bits;
4422     d->p_tex_bits= s->p_tex_bits;
4423     d->i_count= s->i_count;
4424     d->f_count= s->f_count;
4425     d->b_count= s->b_count;
4426     d->skip_count= s->skip_count;
4427     d->misc_bits= s->misc_bits;
4428
4429     d->mb_intra= s->mb_intra;
4430     d->mb_skipped= s->mb_skipped;
4431     d->mv_type= s->mv_type;
4432     d->mv_dir= s->mv_dir;
4433     d->pb= s->pb;
4434     if(s->data_partitioning){
4435         d->pb2= s->pb2;
4436         d->tex_pb= s->tex_pb;
4437     }
4438     d->block= s->block;
4439     for(i=0; i<6; i++)
4440         d->block_last_index[i]= s->block_last_index[i];
4441     d->interlaced_dct= s->interlaced_dct;
4442     d->qscale= s->qscale;
4443 }
4444
4445 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4446                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4447                            int *dmin, int *next_block, int motion_x, int motion_y)
4448 {
4449     int score;
4450     uint8_t *dest_backup[3];
4451
4452     copy_context_before_encode(s, backup, type);
4453
4454     s->block= s->blocks[*next_block];
4455     s->pb= pb[*next_block];
4456     if(s->data_partitioning){
4457         s->pb2   = pb2   [*next_block];
4458         s->tex_pb= tex_pb[*next_block];
4459     }
4460
4461     if(*next_block){
4462         memcpy(dest_backup, s->dest, sizeof(s->dest));
4463         s->dest[0] = s->rd_scratchpad;
4464         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4465         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4466         assert(s->linesize >= 32); //FIXME
4467     }
4468
4469     encode_mb(s, motion_x, motion_y);
4470
4471     score= put_bits_count(&s->pb);
4472     if(s->data_partitioning){
4473         score+= put_bits_count(&s->pb2);
4474         score+= put_bits_count(&s->tex_pb);
4475     }
4476
4477     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4478         MPV_decode_mb(s, s->block);
4479
4480         score *= s->lambda2;
4481         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4482     }
4483
4484     if(*next_block){
4485         memcpy(s->dest, dest_backup, sizeof(s->dest));
4486     }
4487
4488     if(score<*dmin){
4489         *dmin= score;
4490         *next_block^=1;
4491
4492         copy_context_after_encode(best, s, type);
4493     }
4494 }
4495
4496 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4497     uint32_t *sq = squareTbl + 256;
4498     int acc=0;
4499     int x,y;
4500
4501     if(w==16 && h==16)
4502         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4503     else if(w==8 && h==8)
4504         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4505
4506     for(y=0; y<h; y++){
4507         for(x=0; x<w; x++){
4508             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4509         }
4510     }
4511
4512     assert(acc>=0);
4513
4514     return acc;
4515 }
4516
4517 static int sse_mb(MpegEncContext *s){
4518     int w= 16;
4519     int h= 16;
4520
4521     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4522     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4523
4524     if(w==16 && h==16)
4525       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4526         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4527                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4528                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4529       }else{
4530         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4531                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4532                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4533       }
4534     else
4535         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4536                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4537                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4538 }
4539
4540 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4541     MpegEncContext *s= arg;
4542
4543
4544     s->me.pre_pass=1;
4545     s->me.dia_size= s->avctx->pre_dia_size;
4546     s->first_slice_line=1;
4547     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4548         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4549             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4550         }
4551         s->first_slice_line=0;
4552     }
4553
4554     s->me.pre_pass=0;
4555
4556     return 0;
4557 }
4558
4559 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4560     MpegEncContext *s= arg;
4561
4562     s->me.dia_size= s->avctx->dia_size;
4563     s->first_slice_line=1;
4564     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4565         s->mb_x=0; //for block init below
4566         ff_init_block_index(s);
4567         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4568             s->block_index[0]+=2;
4569             s->block_index[1]+=2;
4570             s->block_index[2]+=2;
4571             s->block_index[3]+=2;
4572
4573             /* compute motion vector & mb_type and store in context */
4574             if(s->pict_type==B_TYPE)
4575                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4576             else
4577                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4578         }
4579         s->first_slice_line=0;
4580     }
4581     return 0;
4582 }
4583
4584 static int mb_var_thread(AVCodecContext *c, void *arg){
4585     MpegEncContext *s= arg;
4586     int mb_x, mb_y;
4587
4588     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4589         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4590             int xx = mb_x * 16;
4591             int yy = mb_y * 16;
4592             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4593             int varc;
4594             int sum = s->dsp.pix_sum(pix, s->linesize);
4595
4596             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4597
4598             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4599             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4600             s->me.mb_var_sum_temp    += varc;
4601         }
4602     }
4603     return 0;
4604 }
4605
4606 static void write_slice_end(MpegEncContext *s){
4607     if(s->codec_id==CODEC_ID_MPEG4){
4608         if(s->partitioned_frame){
4609             ff_mpeg4_merge_partitions(s);
4610         }
4611
4612         ff_mpeg4_stuffing(&s->pb);
4613     }else if(s->out_format == FMT_MJPEG){
4614         ff_mjpeg_stuffing(&s->pb);
4615     }
4616
4617     align_put_bits(&s->pb);
4618     flush_put_bits(&s->pb);
4619
4620     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4621         s->misc_bits+= get_bits_diff(s);
4622 }
4623
4624 static int encode_thread(AVCodecContext *c, void *arg){
4625     MpegEncContext *s= arg;
4626     int mb_x, mb_y, pdif = 0;
4627     int i, j;
4628     MpegEncContext best_s, backup_s;
4629     uint8_t bit_buf[2][MAX_MB_BYTES];
4630     uint8_t bit_buf2[2][MAX_MB_BYTES];
4631     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4632     PutBitContext pb[2], pb2[2], tex_pb[2];
4633 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4634
4635     for(i=0; i<2; i++){
4636         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4637         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4638         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4639     }
4640
4641     s->last_bits= put_bits_count(&s->pb);
4642     s->mv_bits=0;
4643     s->misc_bits=0;
4644     s->i_tex_bits=0;
4645     s->p_tex_bits=0;
4646     s->i_count=0;
4647     s->f_count=0;
4648     s->b_count=0;
4649     s->skip_count=0;
4650
4651     for(i=0; i<3; i++){
4652         /* init last dc values */
4653         /* note: quant matrix value (8) is implied here */
4654         s->last_dc[i] = 128 << s->intra_dc_precision;
4655
4656         s->current_picture_ptr->error[i] = 0;
4657     }
4658     s->mb_skip_run = 0;
4659     memset(s->last_mv, 0, sizeof(s->last_mv));
4660
4661     s->last_mv_dir = 0;
4662
4663     switch(s->codec_id){
4664     case CODEC_ID_H263:
4665     case CODEC_ID_H263P:
4666     case CODEC_ID_FLV1:
4667         s->gob_index = ff_h263_get_gob_height(s);
4668         break;
4669     case CODEC_ID_MPEG4:
4670         if(s->partitioned_frame)
4671             ff_mpeg4_init_partitions(s);
4672         break;
4673     }
4674
4675     s->resync_mb_x=0;
4676     s->resync_mb_y=0;
4677     s->first_slice_line = 1;
4678     s->ptr_lastgob = s->pb.buf;
4679     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4680 //    printf("row %d at %X\n", s->mb_y, (int)s);
4681         s->mb_x=0;
4682         s->mb_y= mb_y;
4683
4684         ff_set_qscale(s, s->qscale);
4685         ff_init_block_index(s);
4686
4687         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4688             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4689             int mb_type= s->mb_type[xy];
4690 //            int d;
4691             int dmin= INT_MAX;
4692             int dir;
4693
4694             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4695                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4696                 return -1;
4697             }
4698             if(s->data_partitioning){
4699                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4700                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4701                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4702                     return -1;
4703                 }
4704             }
4705
4706             s->mb_x = mb_x;
4707             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4708             ff_update_block_index(s);
4709
4710 #ifdef CONFIG_H261_ENCODER
4711             if(s->codec_id == CODEC_ID_H261){
4712                 ff_h261_reorder_mb_index(s);
4713                 xy= s->mb_y*s->mb_stride + s->mb_x;
4714                 mb_type= s->mb_type[xy];
4715             }
4716 #endif
4717
4718             /* write gob / video packet header  */
4719             if(s->rtp_mode){
4720                 int current_packet_size, is_gob_start;
4721
4722                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4723
4724                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4725
4726                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4727
4728                 switch(s->codec_id){
4729                 case CODEC_ID_H263:
4730                 case CODEC_ID_H263P:
4731                     if(!s->h263_slice_structured)
4732                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4733                     break;
4734                 case CODEC_ID_MPEG2VIDEO:
4735                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4736                 case CODEC_ID_MPEG1VIDEO:
4737                     if(s->mb_skip_run) is_gob_start=0;
4738                     break;
4739                 }
4740
4741                 if(is_gob_start){
4742                     if(s->start_mb_y != mb_y || mb_x!=0){
4743                         write_slice_end(s);
4744
4745                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4746                             ff_mpeg4_init_partitions(s);
4747                         }
4748                     }
4749
4750                     assert((put_bits_count(&s->pb)&7) == 0);
4751                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4752
4753                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4754                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4755                         int d= 100 / s->avctx->error_rate;
4756                         if(r % d == 0){
4757                             current_packet_size=0;
4758 #ifndef ALT_BITSTREAM_WRITER
4759                             s->pb.buf_ptr= s->ptr_lastgob;
4760 #endif
4761                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4762                         }
4763                     }
4764
4765                     if (s->avctx->rtp_callback){
4766                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4767                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4768                     }
4769
4770                     switch(s->codec_id){
4771                     case CODEC_ID_MPEG4:
4772                         ff_mpeg4_encode_video_packet_header(s);
4773                         ff_mpeg4_clean_buffers(s);
4774                     break;
4775                     case CODEC_ID_MPEG1VIDEO:
4776                     case CODEC_ID_MPEG2VIDEO:
4777                         ff_mpeg1_encode_slice_header(s);
4778                         ff_mpeg1_clean_buffers(s);
4779                     break;
4780                     case CODEC_ID_H263:
4781                     case CODEC_ID_H263P:
4782                         h263_encode_gob_header(s, mb_y);
4783                     break;
4784                     }
4785
4786                     if(s->flags&CODEC_FLAG_PASS1){
4787                         int bits= put_bits_count(&s->pb);
4788                         s->misc_bits+= bits - s->last_bits;
4789                         s->last_bits= bits;
4790                     }
4791
4792                     s->ptr_lastgob += current_packet_size;
4793                     s->first_slice_line=1;
4794                     s->resync_mb_x=mb_x;
4795                     s->resync_mb_y=mb_y;
4796                 }
4797             }
4798
4799             if(  (s->resync_mb_x   == s->mb_x)
4800                && s->resync_mb_y+1 == s->mb_y){
4801                 s->first_slice_line=0;
4802             }
4803
4804             s->mb_skipped=0;
4805             s->dquant=0; //only for QP_RD
4806
4807             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4808                 int next_block=0;
4809                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4810
4811                 copy_context_before_encode(&backup_s, s, -1);
4812                 backup_s.pb= s->pb;
4813                 best_s.data_partitioning= s->data_partitioning;
4814                 best_s.partitioned_frame= s->partitioned_frame;
4815                 if(s->data_partitioning){
4816                     backup_s.pb2= s->pb2;
4817                     backup_s.tex_pb= s->tex_pb;
4818                 }
4819
4820                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4821                     s->mv_dir = MV_DIR_FORWARD;
4822                     s->mv_type = MV_TYPE_16X16;
4823                     s->mb_intra= 0;
4824                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4825                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4826                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4827                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4828                 }
4829                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4830                     s->mv_dir = MV_DIR_FORWARD;
4831                     s->mv_type = MV_TYPE_FIELD;
4832                     s->mb_intra= 0;
4833                     for(i=0; i<2; i++){
4834                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4835                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4836                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4837                     }
4838                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4839                                  &dmin, &next_block, 0, 0);
4840                 }
4841                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4842                     s->mv_dir = MV_DIR_FORWARD;
4843                     s->mv_type = MV_TYPE_16X16;
4844                     s->mb_intra= 0;
4845                     s->mv[0][0][0] = 0;
4846                     s->mv[0][0][1] = 0;
4847                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4848                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4849                 }
4850                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4851                     s->mv_dir = MV_DIR_FORWARD;
4852                     s->mv_type = MV_TYPE_8X8;
4853                     s->mb_intra= 0;
4854                     for(i=0; i<4; i++){
4855                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4856                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4857                     }
4858                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4859                                  &dmin, &next_block, 0, 0);
4860                 }
4861                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4862                     s->mv_dir = MV_DIR_FORWARD;
4863                     s->mv_type = MV_TYPE_16X16;
4864                     s->mb_intra= 0;
4865                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4866                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4867                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4868                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4869                 }
4870                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4871                     s->mv_dir = MV_DIR_BACKWARD;
4872                     s->mv_type = MV_TYPE_16X16;
4873                     s->mb_intra= 0;
4874                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4875                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4876                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4877                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4878                 }
4879                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4880                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4881                     s->mv_type = MV_TYPE_16X16;
4882                     s->mb_intra= 0;
4883                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4884                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4885                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4886                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4887                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4888                                  &dmin, &next_block, 0, 0);
4889                 }
4890                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4891                     int mx= s->b_direct_mv_table[xy][0];
4892                     int my= s->b_direct_mv_table[xy][1];
4893
4894                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4895                     s->mb_intra= 0;
4896                     ff_mpeg4_set_direct_mv(s, mx, my);
4897                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4898                                  &dmin, &next_block, mx, my);
4899                 }
4900                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4901                     s->mv_dir = MV_DIR_FORWARD;
4902                     s->mv_type = MV_TYPE_FIELD;
4903                     s->mb_intra= 0;
4904                     for(i=0; i<2; i++){
4905                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4906                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4907                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4908                     }
4909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4910                                  &dmin, &next_block, 0, 0);
4911                 }
4912                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4913                     s->mv_dir = MV_DIR_BACKWARD;
4914                     s->mv_type = MV_TYPE_FIELD;
4915                     s->mb_intra= 0;
4916                     for(i=0; i<2; i++){
4917                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4918                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4919                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4920                     }
4921                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4922                                  &dmin, &next_block, 0, 0);
4923                 }
4924                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4925                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4926                     s->mv_type = MV_TYPE_FIELD;
4927                     s->mb_intra= 0;
4928                     for(dir=0; dir<2; dir++){
4929                         for(i=0; i<2; i++){
4930                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4931                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4932                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4933                         }
4934                     }
4935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4936                                  &dmin, &next_block, 0, 0);
4937                 }
4938                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4939                     s->mv_dir = 0;
4940                     s->mv_type = MV_TYPE_16X16;
4941                     s->mb_intra= 1;
4942                     s->mv[0][0][0] = 0;
4943                     s->mv[0][0][1] = 0;
4944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4945                                  &dmin, &next_block, 0, 0);
4946                     if(s->h263_pred || s->h263_aic){
4947                         if(best_s.mb_intra)
4948                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4949                         else
4950                             ff_clean_intra_table_entries(s); //old mode?
4951                     }
4952                 }
4953
4954                 if(s->flags & CODEC_FLAG_QP_RD){
4955                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4956                         const int last_qp= backup_s.qscale;
4957                         int dquant, dir, qp, dc[6];
4958                         DCTELEM ac[6][16];
4959                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4960
4961                         assert(backup_s.dquant == 0);
4962
4963                         //FIXME intra
4964                         s->mv_dir= best_s.mv_dir;
4965                         s->mv_type = MV_TYPE_16X16;
4966                         s->mb_intra= best_s.mb_intra;
4967                         s->mv[0][0][0] = best_s.mv[0][0][0];
4968                         s->mv[0][0][1] = best_s.mv[0][0][1];
4969                         s->mv[1][0][0] = best_s.mv[1][0][0];
4970                         s->mv[1][0][1] = best_s.mv[1][0][1];
4971
4972                         dir= s->pict_type == B_TYPE ? 2 : 1;
4973                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4974                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4975                             qp= last_qp + dquant;
4976                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4977                                 break;
4978                             backup_s.dquant= dquant;
4979                             if(s->mb_intra && s->dc_val[0]){
4980                                 for(i=0; i<6; i++){
4981                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4982                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4983                                 }
4984                             }
4985
4986                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4987                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4988                             if(best_s.qscale != qp){
4989                                 if(s->mb_intra && s->dc_val[0]){
4990                                     for(i=0; i<6; i++){
4991                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4992                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4993                                     }
4994                                 }
4995                                 if(dir > 0 && dquant==dir){
4996                                     dquant= 0;
4997                                     dir= -dir;
4998                                 }else
4999                                     break;
5000                             }
5001                         }
5002                         qp= best_s.qscale;
5003                         s->current_picture.qscale_table[xy]= qp;
5004                     }
5005                 }
5006
5007                 copy_context_after_encode(s, &best_s, -1);
5008
5009                 pb_bits_count= put_bits_count(&s->pb);
5010                 flush_put_bits(&s->pb);
5011                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5012                 s->pb= backup_s.pb;
5013
5014                 if(s->data_partitioning){
5015                     pb2_bits_count= put_bits_count(&s->pb2);
5016                     flush_put_bits(&s->pb2);
5017                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5018                     s->pb2= backup_s.pb2;
5019
5020                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5021                     flush_put_bits(&s->tex_pb);
5022                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5023                     s->tex_pb= backup_s.tex_pb;
5024                 }
5025                 s->last_bits= put_bits_count(&s->pb);
5026
5027                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5028                     ff_h263_update_motion_val(s);
5029
5030                 if(next_block==0){ //FIXME 16 vs linesize16
5031                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5032                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5033                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5034                 }
5035
5036                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5037                     MPV_decode_mb(s, s->block);
5038             } else {
5039                 int motion_x, motion_y;
5040                 s->mv_type=MV_TYPE_16X16;
5041                 // only one MB-Type possible
5042
5043                 switch(mb_type){
5044                 case CANDIDATE_MB_TYPE_INTRA:
5045                     s->mv_dir = 0;
5046                     s->mb_intra= 1;
5047                     motion_x= s->mv[0][0][0] = 0;
5048                     motion_y= s->mv[0][0][1] = 0;
5049                     break;
5050                 case CANDIDATE_MB_TYPE_INTER:
5051                     s->mv_dir = MV_DIR_FORWARD;
5052                     s->mb_intra= 0;
5053                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5054                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5055                     break;
5056                 case CANDIDATE_MB_TYPE_INTER_I:
5057                     s->mv_dir = MV_DIR_FORWARD;
5058                     s->mv_type = MV_TYPE_FIELD;
5059                     s->mb_intra= 0;
5060                     for(i=0; i<2; i++){
5061                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5062                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5063                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5064                     }
5065                     motion_x = motion_y = 0;
5066                     break;
5067                 case CANDIDATE_MB_TYPE_INTER4V:
5068                     s->mv_dir = MV_DIR_FORWARD;
5069                     s->mv_type = MV_TYPE_8X8;
5070                     s->mb_intra= 0;
5071                     for(i=0; i<4; i++){
5072                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5073                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5074                     }
5075                     motion_x= motion_y= 0;
5076                     break;
5077                 case CANDIDATE_MB_TYPE_DIRECT:
5078                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5079                     s->mb_intra= 0;
5080                     motion_x=s->b_direct_mv_table[xy][0];
5081                     motion_y=s->b_direct_mv_table[xy][1];
5082                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5083                     break;
5084                 case CANDIDATE_MB_TYPE_BIDIR:
5085                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5086                     s->mb_intra= 0;
5087                     motion_x=0;
5088                     motion_y=0;
5089                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5090                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5091                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5092                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5093                     break;
5094                 case CANDIDATE_MB_TYPE_BACKWARD:
5095                     s->mv_dir = MV_DIR_BACKWARD;
5096                     s->mb_intra= 0;
5097                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5098                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5099                     break;
5100                 case CANDIDATE_MB_TYPE_FORWARD:
5101                     s->mv_dir = MV_DIR_FORWARD;
5102                     s->mb_intra= 0;
5103                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5104                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5105 //                    printf(" %d %d ", motion_x, motion_y);
5106                     break;
5107                 case CANDIDATE_MB_TYPE_FORWARD_I:
5108                     s->mv_dir = MV_DIR_FORWARD;
5109                     s->mv_type = MV_TYPE_FIELD;
5110                     s->mb_intra= 0;
5111                     for(i=0; i<2; i++){
5112                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5113                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5114                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5115                     }
5116                     motion_x=motion_y=0;
5117                     break;
5118                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5119                     s->mv_dir = MV_DIR_BACKWARD;
5120                     s->mv_type = MV_TYPE_FIELD;
5121                     s->mb_intra= 0;
5122                     for(i=0; i<2; i++){
5123                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5124                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5125                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5126                     }
5127                     motion_x=motion_y=0;
5128                     break;
5129                 case CANDIDATE_MB_TYPE_BIDIR_I:
5130                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5131                     s->mv_type = MV_TYPE_FIELD;
5132                     s->mb_intra= 0;
5133                     for(dir=0; dir<2; dir++){
5134                         for(i=0; i<2; i++){
5135                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5136                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5137                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5138                         }
5139                     }
5140                     motion_x=motion_y=0;
5141                     break;
5142                 default:
5143                     motion_x=motion_y=0; //gcc warning fix
5144                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5145                 }
5146
5147                 encode_mb(s, motion_x, motion_y);
5148
5149                 // RAL: Update last macroblock type
5150                 s->last_mv_dir = s->mv_dir;
5151
5152                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5153                     ff_h263_update_motion_val(s);
5154
5155                 MPV_decode_mb(s, s->block);
5156             }
5157
5158             /* clean the MV table in IPS frames for direct mode in B frames */
5159             if(s->mb_intra /* && I,P,S_TYPE */){
5160                 s->p_mv_table[xy][0]=0;
5161                 s->p_mv_table[xy][1]=0;
5162             }
5163
5164             if(s->flags&CODEC_FLAG_PSNR){
5165                 int w= 16;
5166                 int h= 16;
5167
5168                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5169                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5170
5171                 s->current_picture_ptr->error[0] += sse(
5172                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5173                     s->dest[0], w, h, s->linesize);
5174                 s->current_picture_ptr->error[1] += sse(
5175                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5176                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5177                 s->current_picture_ptr->error[2] += sse(
5178                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5179                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5180             }
5181             if(s->loop_filter){
5182                 if(s->out_format == FMT_H263)
5183                     ff_h263_loop_filter(s);
5184             }
5185 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5186         }
5187     }
5188
5189     //not beautiful here but we must write it before flushing so it has to be here
5190     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5191         msmpeg4_encode_ext_header(s);
5192
5193     write_slice_end(s);
5194
5195     /* Send the last GOB if RTP */
5196     if (s->avctx->rtp_callback) {
5197         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5198         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5199         /* Call the RTP callback to send the last GOB */
5200         emms_c();
5201         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5202     }
5203
5204     return 0;
5205 }
5206
5207 #define MERGE(field) dst->field += src->field; src->field=0
5208 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5209     MERGE(me.scene_change_score);
5210     MERGE(me.mc_mb_var_sum_temp);
5211     MERGE(me.mb_var_sum_temp);
5212 }
5213
5214 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5215     int i;
5216
5217     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5218     MERGE(dct_count[1]);
5219     MERGE(mv_bits);
5220     MERGE(i_tex_bits);
5221     MERGE(p_tex_bits);
5222     MERGE(i_count);
5223     MERGE(f_count);
5224     MERGE(b_count);
5225     MERGE(skip_count);
5226     MERGE(misc_bits);
5227     MERGE(error_count);
5228     MERGE(padding_bug_score);
5229
5230     if(dst->avctx->noise_reduction){
5231         for(i=0; i<64; i++){
5232             MERGE(dct_error_sum[0][i]);
5233             MERGE(dct_error_sum[1][i]);
5234         }
5235     }
5236
5237     assert(put_bits_count(&src->pb) % 8 ==0);
5238     assert(put_bits_count(&dst->pb) % 8 ==0);
5239     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5240     flush_put_bits(&dst->pb);
5241 }
5242
5243 static void encode_picture(MpegEncContext *s, int picture_number)
5244 {
5245     int i;
5246     int bits;
5247
5248     s->picture_number = picture_number;
5249
5250     /* Reset the average MB variance */
5251     s->me.mb_var_sum_temp    =
5252     s->me.mc_mb_var_sum_temp = 0;
5253
5254     /* we need to initialize some time vars before we can encode b-frames */
5255     // RAL: Condition added for MPEG1VIDEO
5256     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5257         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5258
5259     s->me.scene_change_score=0;
5260
5261 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5262
5263     if(s->pict_type==I_TYPE){
5264         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5265         else                        s->no_rounding=0;
5266     }else if(s->pict_type!=B_TYPE){
5267         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5268             s->no_rounding ^= 1;
5269     }
5270
5271     s->mb_intra=0; //for the rate distortion & bit compare functions
5272     for(i=1; i<s->avctx->thread_count; i++){
5273         ff_update_duplicate_context(s->thread_context[i], s);
5274     }
5275
5276     ff_init_me(s);
5277
5278     /* Estimate motion for every MB */
5279     if(s->pict_type != I_TYPE){
5280         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5281         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5282         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5283             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5284                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5285             }
5286         }
5287
5288         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5289     }else /* if(s->pict_type == I_TYPE) */{
5290         /* I-Frame */
5291         for(i=0; i<s->mb_stride*s->mb_height; i++)
5292             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5293
5294         if(!s->fixed_qscale){
5295             /* finding spatial complexity for I-frame rate control */
5296             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5297         }
5298     }
5299     for(i=1; i<s->avctx->thread_count; i++){
5300         merge_context_after_me(s, s->thread_context[i]);
5301     }
5302     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5303     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5304     emms_c();
5305
5306     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5307         s->pict_type= I_TYPE;
5308         for(i=0; i<s->mb_stride*s->mb_height; i++)
5309             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5310 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5311     }
5312
5313     if(!s->umvplus){
5314         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5315             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5316
5317             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5318                 int a,b;
5319                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5320                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5321                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5322             }
5323
5324             ff_fix_long_p_mvs(s);
5325             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5326             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5327                 int j;
5328                 for(i=0; i<2; i++){
5329                     for(j=0; j<2; j++)
5330                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5331                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5332                 }
5333             }
5334         }
5335
5336         if(s->pict_type==B_TYPE){
5337             int a, b;
5338
5339             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5340             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5341             s->f_code = FFMAX(a, b);
5342
5343             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5344             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5345             s->b_code = FFMAX(a, b);
5346
5347             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5348             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5349             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5350             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5351             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5352                 int dir, j;
5353                 for(dir=0; dir<2; dir++){
5354                     for(i=0; i<2; i++){
5355                         for(j=0; j<2; j++){
5356                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5357                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5358                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5359                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5360                         }
5361                     }
5362                 }
5363             }
5364         }
5365     }
5366
5367     if (!s->fixed_qscale)
5368         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5369
5370     if(s->adaptive_quant){
5371         switch(s->codec_id){
5372         case CODEC_ID_MPEG4:
5373             ff_clean_mpeg4_qscales(s);
5374             break;
5375         case CODEC_ID_H263:
5376         case CODEC_ID_H263P:
5377         case CODEC_ID_FLV1:
5378             ff_clean_h263_qscales(s);
5379             break;
5380         }
5381
5382         s->lambda= s->lambda_table[0];
5383         //FIXME broken
5384     }else
5385         s->lambda= s->current_picture.quality;
5386 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5387     update_qscale(s);
5388
5389     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5390         s->qscale= 3; //reduce clipping problems
5391
5392     if (s->out_format == FMT_MJPEG) {
5393         /* for mjpeg, we do include qscale in the matrix */
5394         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5395         for(i=1;i<64;i++){
5396             int j= s->dsp.idct_permutation[i];
5397
5398             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5399         }
5400         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5401                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5402         s->qscale= 8;
5403     }
5404
5405     //FIXME var duplication
5406     s->current_picture_ptr->key_frame=
5407     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5408     s->current_picture_ptr->pict_type=
5409     s->current_picture.pict_type= s->pict_type;
5410
5411     if(s->current_picture.key_frame)
5412         s->picture_in_gop_number=0;
5413
5414     s->last_bits= put_bits_count(&s->pb);
5415     switch(s->out_format) {
5416     case FMT_MJPEG:
5417         mjpeg_picture_header(s);
5418         break;
5419 #ifdef CONFIG_H261_ENCODER
5420     case FMT_H261:
5421         ff_h261_encode_picture_header(s, picture_number);
5422         break;
5423 #endif
5424     case FMT_H263:
5425         if (s->codec_id == CODEC_ID_WMV2)
5426             ff_wmv2_encode_picture_header(s, picture_number);
5427         else if (s->h263_msmpeg4)
5428             msmpeg4_encode_picture_header(s, picture_number);
5429         else if (s->h263_pred)
5430             mpeg4_encode_picture_header(s, picture_number);
5431 #ifdef CONFIG_RV10_ENCODER
5432         else if (s->codec_id == CODEC_ID_RV10)
5433             rv10_encode_picture_header(s, picture_number);
5434 #endif
5435 #ifdef CONFIG_RV20_ENCODER
5436         else if (s->codec_id == CODEC_ID_RV20)
5437             rv20_encode_picture_header(s, picture_number);
5438 #endif
5439         else if (s->codec_id == CODEC_ID_FLV1)
5440             ff_flv_encode_picture_header(s, picture_number);
5441         else
5442             h263_encode_picture_header(s, picture_number);
5443         break;
5444     case FMT_MPEG1:
5445         mpeg1_encode_picture_header(s, picture_number);
5446         break;
5447     case FMT_H264:
5448         break;
5449     default:
5450         assert(0);
5451     }
5452     bits= put_bits_count(&s->pb);
5453     s->header_bits= bits - s->last_bits;
5454
5455     for(i=1; i<s->avctx->thread_count; i++){
5456         update_duplicate_context_after_me(s->thread_context[i], s);
5457     }
5458     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5459     for(i=1; i<s->avctx->thread_count; i++){
5460         merge_context_after_encode(s, s->thread_context[i]);
5461     }
5462     emms_c();
5463 }
5464
5465 #endif //CONFIG_ENCODERS
5466
5467 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5468     const int intra= s->mb_intra;
5469     int i;
5470
5471     s->dct_count[intra]++;
5472
5473     for(i=0; i<64; i++){
5474         int level= block[i];
5475
5476         if(level){
5477             if(level>0){
5478                 s->dct_error_sum[intra][i] += level;
5479                 level -= s->dct_offset[intra][i];
5480                 if(level<0) level=0;
5481             }else{
5482                 s->dct_error_sum[intra][i] -= level;
5483                 level += s->dct_offset[intra][i];
5484                 if(level>0) level=0;
5485             }
5486             block[i]= level;
5487         }
5488     }
5489 }
5490
5491 #ifdef CONFIG_ENCODERS
5492
5493 static int dct_quantize_trellis_c(MpegEncContext *s,
5494                         DCTELEM *block, int n,
5495                         int qscale, int *overflow){
5496     const int *qmat;
5497     const uint8_t *scantable= s->intra_scantable.scantable;
5498     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5499     int max=0;
5500     unsigned int threshold1, threshold2;
5501     int bias=0;
5502     int run_tab[65];
5503     int level_tab[65];
5504     int score_tab[65];
5505     int survivor[65];
5506     int survivor_count;
5507     int last_run=0;
5508     int last_level=0;
5509     int last_score= 0;
5510     int last_i;
5511     int coeff[2][64];
5512     int coeff_count[64];
5513     int qmul, qadd, start_i, last_non_zero, i, dc;
5514     const int esc_length= s->ac_esc_length;
5515     uint8_t * length;
5516     uint8_t * last_length;
5517     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5518
5519     s->dsp.fdct (block);
5520
5521     if(s->dct_error_sum)
5522         s->denoise_dct(s, block);
5523     qmul= qscale*16;
5524     qadd= ((qscale-1)|1)*8;
5525
5526     if (s->mb_intra) {
5527         int q;
5528         if (!s->h263_aic) {
5529             if (n < 4)
5530                 q = s->y_dc_scale;
5531             else
5532                 q = s->c_dc_scale;
5533             q = q << 3;
5534         } else{
5535             /* For AIC we skip quant/dequant of INTRADC */
5536             q = 1 << 3;
5537             qadd=0;
5538         }
5539
5540         /* note: block[0] is assumed to be positive */
5541         block[0] = (block[0] + (q >> 1)) / q;
5542         start_i = 1;
5543         last_non_zero = 0;
5544         qmat = s->q_intra_matrix[qscale];
5545         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5546             bias= 1<<(QMAT_SHIFT-1);
5547         length     = s->intra_ac_vlc_length;
5548         last_length= s->intra_ac_vlc_last_length;
5549     } else {
5550         start_i = 0;
5551         last_non_zero = -1;
5552         qmat = s->q_inter_matrix[qscale];
5553         length     = s->inter_ac_vlc_length;
5554         last_length= s->inter_ac_vlc_last_length;
5555     }
5556     last_i= start_i;
5557
5558     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5559     threshold2= (threshold1<<1);
5560
5561     for(i=63; i>=start_i; i--) {
5562         const int j = scantable[i];
5563         int level = block[j] * qmat[j];
5564
5565         if(((unsigned)(level+threshold1))>threshold2){
5566             last_non_zero = i;
5567             break;
5568         }
5569     }
5570
5571     for(i=start_i; i<=last_non_zero; i++) {
5572         const int j = scantable[i];
5573         int level = block[j] * qmat[j];
5574
5575 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5576 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5577         if(((unsigned)(level+threshold1))>threshold2){
5578             if(level>0){
5579                 level= (bias + level)>>QMAT_SHIFT;
5580                 coeff[0][i]= level;
5581                 coeff[1][i]= level-1;
5582 //                coeff[2][k]= level-2;
5583             }else{
5584                 level= (bias - level)>>QMAT_SHIFT;
5585                 coeff[0][i]= -level;
5586                 coeff[1][i]= -level+1;
5587 //                coeff[2][k]= -level+2;
5588             }
5589             coeff_count[i]= FFMIN(level, 2);
5590             assert(coeff_count[i]);
5591             max |=level;
5592         }else{
5593             coeff[0][i]= (level>>31)|1;
5594             coeff_count[i]= 1;
5595         }
5596     }
5597
5598     *overflow= s->max_qcoeff < max; //overflow might have happened
5599
5600     if(last_non_zero < start_i){
5601         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5602         return last_non_zero;
5603     }
5604
5605     score_tab[start_i]= 0;
5606     survivor[0]= start_i;
5607     survivor_count= 1;
5608
5609     for(i=start_i; i<=last_non_zero; i++){
5610         int level_index, j;
5611         const int dct_coeff= ABS(block[ scantable[i] ]);
5612         const int zero_distoration= dct_coeff*dct_coeff;
5613         int best_score=256*256*256*120;
5614         for(level_index=0; level_index < coeff_count[i]; level_index++){
5615             int distoration;
5616             int level= coeff[level_index][i];
5617             const int alevel= ABS(level);
5618             int unquant_coeff;
5619
5620             assert(level);
5621
5622             if(s->out_format == FMT_H263){
5623                 unquant_coeff= alevel*qmul + qadd;
5624             }else{ //MPEG1
5625                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5626                 if(s->mb_intra){
5627                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5628                         unquant_coeff =   (unquant_coeff - 1) | 1;
5629                 }else{
5630                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5631                         unquant_coeff =   (unquant_coeff - 1) | 1;
5632                 }
5633                 unquant_coeff<<= 3;
5634             }
5635
5636             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5637             level+=64;
5638             if((level&(~127)) == 0){
5639                 for(j=survivor_count-1; j>=0; j--){
5640                     int run= i - survivor[j];
5641                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5642                     score += score_tab[i-run];
5643
5644                     if(score < best_score){
5645                         best_score= score;
5646                         run_tab[i+1]= run;
5647                         level_tab[i+1]= level-64;
5648                     }
5649                 }
5650
5651                 if(s->out_format == FMT_H263){
5652                     for(j=survivor_count-1; j>=0; j--){
5653                         int run= i - survivor[j];
5654                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5655                         score += score_tab[i-run];
5656                         if(score < last_score){
5657                             last_score= score;
5658                             last_run= run;
5659                             last_level= level-64;
5660                             last_i= i+1;
5661                         }
5662                     }
5663                 }
5664             }else{
5665                 distoration += esc_length*lambda;
5666                 for(j=survivor_count-1; j>=0; j--){
5667                     int run= i - survivor[j];
5668                     int score= distoration + score_tab[i-run];
5669
5670                     if(score < best_score){
5671                         best_score= score;
5672                         run_tab[i+1]= run;
5673                         level_tab[i+1]= level-64;
5674                     }
5675                 }
5676
5677                 if(s->out_format == FMT_H263){
5678                   for(j=survivor_count-1; j>=0; j--){
5679                         int run= i - survivor[j];
5680                         int score= distoration + score_tab[i-run];
5681                         if(score < last_score){
5682                             last_score= score;
5683                             last_run= run;
5684                             last_level= level-64;
5685                             last_i= i+1;
5686                         }
5687                     }
5688                 }
5689             }
5690         }
5691
5692         score_tab[i+1]= best_score;
5693
5694         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5695         if(last_non_zero <= 27){
5696             for(; survivor_count; survivor_count--){
5697                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5698                     break;
5699             }
5700         }else{
5701             for(; survivor_count; survivor_count--){
5702                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5703                     break;
5704             }
5705         }
5706
5707         survivor[ survivor_count++ ]= i+1;
5708     }
5709
5710     if(s->out_format != FMT_H263){
5711         last_score= 256*256*256*120;
5712         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5713             int score= score_tab[i];
5714             if(i) score += lambda*2; //FIXME exacter?
5715
5716             if(score < last_score){
5717                 last_score= score;
5718                 last_i= i;
5719                 last_level= level_tab[i];
5720                 last_run= run_tab[i];
5721             }
5722         }
5723     }
5724
5725     s->coded_score[n] = last_score;
5726
5727     dc= ABS(block[0]);
5728     last_non_zero= last_i - 1;
5729     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5730
5731     if(last_non_zero < start_i)
5732         return last_non_zero;
5733
5734     if(last_non_zero == 0 && start_i == 0){
5735         int best_level= 0;
5736         int best_score= dc * dc;
5737
5738         for(i=0; i<coeff_count[0]; i++){
5739             int level= coeff[i][0];
5740             int alevel= ABS(level);
5741             int unquant_coeff, score, distortion;
5742
5743             if(s->out_format == FMT_H263){
5744                     unquant_coeff= (alevel*qmul + qadd)>>3;
5745             }else{ //MPEG1
5746                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5747                     unquant_coeff =   (unquant_coeff - 1) | 1;
5748             }
5749             unquant_coeff = (unquant_coeff + 4) >> 3;
5750             unquant_coeff<<= 3 + 3;
5751
5752             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5753             level+=64;
5754             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5755             else                    score= distortion + esc_length*lambda;
5756
5757             if(score < best_score){
5758                 best_score= score;
5759                 best_level= level - 64;
5760             }
5761         }
5762         block[0]= best_level;
5763         s->coded_score[n] = best_score - dc*dc;
5764         if(best_level == 0) return -1;
5765         else                return last_non_zero;
5766     }
5767
5768     i= last_i;
5769     assert(last_level);
5770
5771     block[ perm_scantable[last_non_zero] ]= last_level;
5772     i -= last_run + 1;
5773
5774     for(; i>start_i; i -= run_tab[i] + 1){
5775         block[ perm_scantable[i-1] ]= level_tab[i];
5776     }
5777
5778     return last_non_zero;
5779 }
5780
5781 //#define REFINE_STATS 1
5782 static int16_t basis[64][64];
5783
5784 static void build_basis(uint8_t *perm){
5785     int i, j, x, y;
5786     emms_c();
5787     for(i=0; i<8; i++){
5788         for(j=0; j<8; j++){
5789             for(y=0; y<8; y++){
5790                 for(x=0; x<8; x++){
5791                     double s= 0.25*(1<<BASIS_SHIFT);
5792                     int index= 8*i + j;
5793                     int perm_index= perm[index];
5794                     if(i==0) s*= sqrt(0.5);
5795                     if(j==0) s*= sqrt(0.5);
5796                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5797                 }
5798             }
5799         }
5800     }
5801 }
5802
5803 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5804                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5805                         int n, int qscale){
5806     int16_t rem[64];
5807     DCTELEM d1[64] __align16;
5808     const int *qmat;
5809     const uint8_t *scantable= s->intra_scantable.scantable;
5810     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5811 //    unsigned int threshold1, threshold2;
5812 //    int bias=0;
5813     int run_tab[65];
5814     int prev_run=0;
5815     int prev_level=0;
5816     int qmul, qadd, start_i, last_non_zero, i, dc;
5817     uint8_t * length;
5818     uint8_t * last_length;
5819     int lambda;
5820     int rle_index, run, q, sum;
5821 #ifdef REFINE_STATS
5822 static int count=0;
5823 static int after_last=0;
5824 static int to_zero=0;
5825 static int from_zero=0;
5826 static int raise=0;
5827 static int lower=0;
5828 static int messed_sign=0;
5829 #endif
5830
5831     if(basis[0][0] == 0)
5832         build_basis(s->dsp.idct_permutation);
5833
5834     qmul= qscale*2;
5835     qadd= (qscale-1)|1;
5836     if (s->mb_intra) {
5837         if (!s->h263_aic) {
5838             if (n < 4)
5839                 q = s->y_dc_scale;
5840             else
5841                 q = s->c_dc_scale;
5842         } else{
5843             /* For AIC we skip quant/dequant of INTRADC */
5844             q = 1;
5845             qadd=0;
5846         }
5847         q <<= RECON_SHIFT-3;
5848         /* note: block[0] is assumed to be positive */
5849         dc= block[0]*q;
5850 //        block[0] = (block[0] + (q >> 1)) / q;
5851         start_i = 1;
5852         qmat = s->q_intra_matrix[qscale];
5853 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5854 //            bias= 1<<(QMAT_SHIFT-1);
5855         length     = s->intra_ac_vlc_length;
5856         last_length= s->intra_ac_vlc_last_length;
5857     } else {
5858         dc= 0;
5859         start_i = 0;
5860         qmat = s->q_inter_matrix[qscale];
5861         length     = s->inter_ac_vlc_length;
5862         last_length= s->inter_ac_vlc_last_length;
5863     }
5864     last_non_zero = s->block_last_index[n];
5865
5866 #ifdef REFINE_STATS
5867 {START_TIMER
5868 #endif
5869     dc += (1<<(RECON_SHIFT-1));
5870     for(i=0; i<64; i++){
5871         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5872     }
5873 #ifdef REFINE_STATS
5874 STOP_TIMER("memset rem[]")}
5875 #endif
5876     sum=0;
5877     for(i=0; i<64; i++){
5878         int one= 36;
5879         int qns=4;
5880         int w;
5881
5882         w= ABS(weight[i]) + qns*one;
5883         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5884
5885         weight[i] = w;
5886 //        w=weight[i] = (63*qns + (w/2)) / w;
5887
5888         assert(w>0);
5889         assert(w<(1<<6));
5890         sum += w*w;
5891     }
5892     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5893 #ifdef REFINE_STATS
5894 {START_TIMER
5895 #endif
5896     run=0;
5897     rle_index=0;
5898     for(i=start_i; i<=last_non_zero; i++){
5899         int j= perm_scantable[i];
5900         const int level= block[j];
5901         int coeff;
5902
5903         if(level){
5904             if(level<0) coeff= qmul*level - qadd;
5905             else        coeff= qmul*level + qadd;
5906             run_tab[rle_index++]=run;
5907             run=0;
5908
5909             s->dsp.add_8x8basis(rem, basis[j], coeff);
5910         }else{
5911             run++;
5912         }
5913     }
5914 #ifdef REFINE_STATS
5915 if(last_non_zero>0){
5916 STOP_TIMER("init rem[]")
5917 }
5918 }
5919
5920 {START_TIMER
5921 #endif
5922     for(;;){
5923         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5924         int best_coeff=0;
5925         int best_change=0;
5926         int run2, best_unquant_change=0, analyze_gradient;
5927 #ifdef REFINE_STATS
5928 {START_TIMER
5929 #endif
5930         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5931
5932         if(analyze_gradient){
5933 #ifdef REFINE_STATS
5934 {START_TIMER
5935 #endif
5936             for(i=0; i<64; i++){
5937                 int w= weight[i];
5938
5939                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5940             }
5941 #ifdef REFINE_STATS
5942 STOP_TIMER("rem*w*w")}
5943 {START_TIMER
5944 #endif
5945             s->dsp.fdct(d1);
5946 #ifdef REFINE_STATS
5947 STOP_TIMER("dct")}
5948 #endif
5949         }
5950
5951         if(start_i){
5952             const int level= block[0];
5953             int change, old_coeff;
5954
5955             assert(s->mb_intra);
5956
5957             old_coeff= q*level;
5958
5959             for(change=-1; change<=1; change+=2){
5960                 int new_level= level + change;
5961                 int score, new_coeff;
5962
5963                 new_coeff= q*new_level;
5964                 if(new_coeff >= 2048 || new_coeff < 0)
5965                     continue;
5966
5967                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5968                 if(score<best_score){
5969                     best_score= score;
5970                     best_coeff= 0;
5971                     best_change= change;
5972                     best_unquant_change= new_coeff - old_coeff;
5973                 }
5974             }
5975         }
5976
5977         run=0;
5978         rle_index=0;
5979         run2= run_tab[rle_index++];
5980         prev_level=0;
5981         prev_run=0;
5982
5983         for(i=start_i; i<64; i++){
5984             int j= perm_scantable[i];
5985             const int level= block[j];
5986             int change, old_coeff;
5987
5988             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5989                 break;
5990
5991             if(level){
5992                 if(level<0) old_coeff= qmul*level - qadd;
5993                 else        old_coeff= qmul*level + qadd;
5994                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5995             }else{
5996                 old_coeff=0;
5997                 run2--;
5998                 assert(run2>=0 || i >= last_non_zero );
5999             }
6000
6001             for(change=-1; change<=1; change+=2){
6002                 int new_level= level + change;
6003                 int score, new_coeff, unquant_change;
6004
6005                 score=0;
6006                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6007                    continue;
6008
6009                 if(new_level){
6010                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6011                     else            new_coeff= qmul*new_level + qadd;
6012                     if(new_coeff >= 2048 || new_coeff <= -2048)
6013                         continue;
6014                     //FIXME check for overflow
6015
6016                     if(level){
6017                         if(level < 63 && level > -63){
6018                             if(i < last_non_zero)
6019                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6020                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6021                             else
6022                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6023                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6024                         }
6025                     }else{
6026                         assert(ABS(new_level)==1);
6027
6028                         if(analyze_gradient){
6029                             int g= d1[ scantable[i] ];
6030                             if(g && (g^new_level) >= 0)
6031                                 continue;
6032                         }
6033
6034                         if(i < last_non_zero){
6035                             int next_i= i + run2 + 1;
6036                             int next_level= block[ perm_scantable[next_i] ] + 64;
6037
6038                             if(next_level&(~127))
6039                                 next_level= 0;
6040
6041                             if(next_i < last_non_zero)
6042                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6043                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6044                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6045                             else
6046                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6047                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6048                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6049                         }else{
6050                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6051                             if(prev_level){
6052                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6053                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6054                             }
6055                         }
6056                     }
6057                 }else{
6058                     new_coeff=0;
6059                     assert(ABS(level)==1);
6060
6061                     if(i < last_non_zero){
6062                         int next_i= i + run2 + 1;
6063                         int next_level= block[ perm_scantable[next_i] ] + 64;
6064
6065                         if(next_level&(~127))
6066                             next_level= 0;
6067
6068                         if(next_i < last_non_zero)
6069                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6070                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6071                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6072                         else
6073                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6074                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6075                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6076                     }else{
6077                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6078                         if(prev_level){
6079                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6080                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6081                         }
6082                     }
6083                 }
6084
6085                 score *= lambda;
6086
6087                 unquant_change= new_coeff - old_coeff;
6088                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6089
6090                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6091                 if(score<best_score){
6092                     best_score= score;
6093                     best_coeff= i;
6094                     best_change= change;
6095                     best_unquant_change= unquant_change;
6096                 }
6097             }
6098             if(level){
6099                 prev_level= level + 64;
6100                 if(prev_level&(~127))
6101                     prev_level= 0;
6102                 prev_run= run;
6103                 run=0;
6104             }else{
6105                 run++;
6106             }
6107         }
6108 #ifdef REFINE_STATS
6109 STOP_TIMER("iterative step")}
6110 #endif
6111
6112         if(best_change){
6113             int j= perm_scantable[ best_coeff ];
6114
6115             block[j] += best_change;
6116
6117             if(best_coeff > last_non_zero){
6118                 last_non_zero= best_coeff;
6119                 assert(block[j]);
6120 #ifdef REFINE_STATS
6121 after_last++;
6122 #endif
6123             }else{
6124 #ifdef REFINE_STATS
6125 if(block[j]){
6126     if(block[j] - best_change){
6127         if(ABS(block[j]) > ABS(block[j] - best_change)){
6128             raise++;
6129         }else{
6130             lower++;
6131         }
6132     }else{
6133         from_zero++;
6134     }
6135 }else{
6136     to_zero++;
6137 }
6138 #endif
6139                 for(; last_non_zero>=start_i; last_non_zero--){
6140                     if(block[perm_scantable[last_non_zero]])
6141                         break;
6142                 }
6143             }
6144 #ifdef REFINE_STATS
6145 count++;
6146 if(256*256*256*64 % count == 0){
6147     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6148 }
6149 #endif
6150             run=0;
6151             rle_index=0;
6152             for(i=start_i; i<=last_non_zero; i++){
6153                 int j= perm_scantable[i];
6154                 const int level= block[j];
6155
6156                  if(level){
6157                      run_tab[rle_index++]=run;
6158                      run=0;
6159                  }else{
6160                      run++;
6161                  }
6162             }
6163
6164             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6165         }else{
6166             break;
6167         }
6168     }
6169 #ifdef REFINE_STATS
6170 if(last_non_zero>0){
6171 STOP_TIMER("iterative search")
6172 }
6173 }
6174 #endif
6175
6176     return last_non_zero;
6177 }
6178
6179 static int dct_quantize_c(MpegEncContext *s,
6180                         DCTELEM *block, int n,
6181                         int qscale, int *overflow)
6182 {
6183     int i, j, level, last_non_zero, q, start_i;
6184     const int *qmat;
6185     const uint8_t *scantable= s->intra_scantable.scantable;
6186     int bias;
6187     int max=0;
6188     unsigned int threshold1, threshold2;
6189
6190     s->dsp.fdct (block);
6191
6192     if(s->dct_error_sum)
6193         s->denoise_dct(s, block);
6194
6195     if (s->mb_intra) {
6196         if (!s->h263_aic) {
6197             if (n < 4)
6198                 q = s->y_dc_scale;
6199             else
6200                 q = s->c_dc_scale;
6201             q = q << 3;
6202         } else
6203             /* For AIC we skip quant/dequant of INTRADC */
6204             q = 1 << 3;
6205
6206         /* note: block[0] is assumed to be positive */
6207         block[0] = (block[0] + (q >> 1)) / q;
6208         start_i = 1;
6209         last_non_zero = 0;
6210         qmat = s->q_intra_matrix[qscale];
6211         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6212     } else {
6213         start_i = 0;
6214         last_non_zero = -1;
6215         qmat = s->q_inter_matrix[qscale];
6216         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6217     }
6218     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6219     threshold2= (threshold1<<1);
6220     for(i=63;i>=start_i;i--) {
6221         j = scantable[i];
6222         level = block[j] * qmat[j];
6223
6224         if(((unsigned)(level+threshold1))>threshold2){
6225             last_non_zero = i;
6226             break;
6227         }else{
6228             block[j]=0;
6229         }
6230     }
6231     for(i=start_i; i<=last_non_zero; i++) {
6232         j = scantable[i];
6233         level = block[j] * qmat[j];
6234
6235 //        if(   bias+level >= (1<<QMAT_SHIFT)
6236 //           || bias-level >= (1<<QMAT_SHIFT)){
6237         if(((unsigned)(level+threshold1))>threshold2){
6238             if(level>0){
6239                 level= (bias + level)>>QMAT_SHIFT;
6240                 block[j]= level;
6241             }else{
6242                 level= (bias - level)>>QMAT_SHIFT;
6243                 block[j]= -level;
6244             }
6245             max |=level;
6246         }else{
6247             block[j]=0;
6248         }
6249     }
6250     *overflow= s->max_qcoeff < max; //overflow might have happened
6251
6252     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6253     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6254         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6255
6256     return last_non_zero;
6257 }
6258
6259 #endif //CONFIG_ENCODERS
6260
6261 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6262                                    DCTELEM *block, int n, int qscale)
6263 {
6264     int i, level, nCoeffs;
6265     const uint16_t *quant_matrix;
6266
6267     nCoeffs= s->block_last_index[n];
6268
6269     if (n < 4)
6270         block[0] = block[0] * s->y_dc_scale;
6271     else
6272         block[0] = block[0] * s->c_dc_scale;
6273     /* XXX: only mpeg1 */
6274     quant_matrix = s->intra_matrix;
6275     for(i=1;i<=nCoeffs;i++) {
6276         int j= s->intra_scantable.permutated[i];
6277         level = block[j];
6278         if (level) {
6279             if (level < 0) {
6280                 level = -level;
6281                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6282                 level = (level - 1) | 1;
6283                 level = -level;
6284             } else {
6285                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6286                 level = (level - 1) | 1;
6287             }
6288             block[j] = level;
6289         }
6290     }
6291 }
6292
6293 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6294                                    DCTELEM *block, int n, int qscale)
6295 {
6296     int i, level, nCoeffs;
6297     const uint16_t *quant_matrix;
6298
6299     nCoeffs= s->block_last_index[n];
6300
6301     quant_matrix = s->inter_matrix;
6302     for(i=0; i<=nCoeffs; i++) {
6303         int j= s->intra_scantable.permutated[i];
6304         level = block[j];
6305         if (level) {
6306             if (level < 0) {
6307                 level = -level;
6308                 level = (((level << 1) + 1) * qscale *
6309                          ((int) (quant_matrix[j]))) >> 4;
6310                 level = (level - 1) | 1;
6311                 level = -level;
6312             } else {
6313                 level = (((level << 1) + 1) * qscale *
6314                          ((int) (quant_matrix[j]))) >> 4;
6315                 level = (level - 1) | 1;
6316             }
6317             block[j] = level;
6318         }
6319     }
6320 }
6321
6322 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6323                                    DCTELEM *block, int n, int qscale)
6324 {
6325     int i, level, nCoeffs;
6326     const uint16_t *quant_matrix;
6327
6328     if(s->alternate_scan) nCoeffs= 63;
6329     else nCoeffs= s->block_last_index[n];
6330
6331     if (n < 4)
6332         block[0] = block[0] * s->y_dc_scale;
6333     else
6334         block[0] = block[0] * s->c_dc_scale;
6335     quant_matrix = s->intra_matrix;
6336     for(i=1;i<=nCoeffs;i++) {
6337         int j= s->intra_scantable.permutated[i];
6338         level = block[j];
6339         if (level) {
6340             if (level < 0) {
6341                 level = -level;
6342                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6343                 level = -level;
6344             } else {
6345                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6346             }
6347             block[j] = level;
6348         }
6349     }
6350 }
6351
6352 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6353                                    DCTELEM *block, int n, int qscale)
6354 {
6355     int i, level, nCoeffs;
6356     const uint16_t *quant_matrix;
6357     int sum=-1;
6358
6359     if(s->alternate_scan) nCoeffs= 63;
6360     else nCoeffs= s->block_last_index[n];
6361
6362     quant_matrix = s->inter_matrix;
6363     for(i=0; i<=nCoeffs; i++) {
6364         int j= s->intra_scantable.permutated[i];
6365         level = block[j];
6366         if (level) {
6367             if (level < 0) {
6368                 level = -level;
6369                 level = (((level << 1) + 1) * qscale *
6370                          ((int) (quant_matrix[j]))) >> 4;
6371                 level = -level;
6372             } else {
6373                 level = (((level << 1) + 1) * qscale *
6374                          ((int) (quant_matrix[j]))) >> 4;
6375             }
6376             block[j] = level;
6377             sum+=level;
6378         }
6379     }
6380     block[63]^=sum&1;
6381 }
6382
6383 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6384                                   DCTELEM *block, int n, int qscale)
6385 {
6386     int i, level, qmul, qadd;
6387     int nCoeffs;
6388
6389     assert(s->block_last_index[n]>=0);
6390
6391     qmul = qscale << 1;
6392
6393     if (!s->h263_aic) {
6394         if (n < 4)
6395             block[0] = block[0] * s->y_dc_scale;
6396         else
6397             block[0] = block[0] * s->c_dc_scale;
6398         qadd = (qscale - 1) | 1;
6399     }else{
6400         qadd = 0;
6401     }
6402     if(s->ac_pred)
6403         nCoeffs=63;
6404     else
6405         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6406
6407     for(i=1; i<=nCoeffs; i++) {
6408         level = block[i];
6409         if (level) {
6410             if (level < 0) {
6411                 level = level * qmul - qadd;
6412             } else {
6413                 level = level * qmul + qadd;
6414             }
6415             block[i] = level;
6416         }
6417     }
6418 }
6419
6420 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6421                                   DCTELEM *block, int n, int qscale)
6422 {
6423     int i, level, qmul, qadd;
6424     int nCoeffs;
6425
6426     assert(s->block_last_index[n]>=0);
6427
6428     qadd = (qscale - 1) | 1;
6429     qmul = qscale << 1;
6430
6431     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6432
6433     for(i=0; i<=nCoeffs; i++) {
6434         level = block[i];
6435         if (level) {
6436             if (level < 0) {
6437                 level = level * qmul - qadd;
6438             } else {
6439                 level = level * qmul + qadd;
6440             }
6441             block[i] = level;
6442         }
6443     }
6444 }
6445
6446 #ifdef CONFIG_ENCODERS
6447 AVCodec h263_encoder = {
6448     "h263",
6449     CODEC_TYPE_VIDEO,
6450     CODEC_ID_H263,
6451     sizeof(MpegEncContext),
6452     MPV_encode_init,
6453     MPV_encode_picture,
6454     MPV_encode_end,
6455     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6456 };
6457
6458 AVCodec h263p_encoder = {
6459     "h263p",
6460     CODEC_TYPE_VIDEO,
6461     CODEC_ID_H263P,
6462     sizeof(MpegEncContext),
6463     MPV_encode_init,
6464     MPV_encode_picture,
6465     MPV_encode_end,
6466     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6467 };
6468
6469 AVCodec flv_encoder = {
6470     "flv",
6471     CODEC_TYPE_VIDEO,
6472     CODEC_ID_FLV1,
6473     sizeof(MpegEncContext),
6474     MPV_encode_init,
6475     MPV_encode_picture,
6476     MPV_encode_end,
6477     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6478 };
6479
6480 AVCodec rv10_encoder = {
6481     "rv10",
6482     CODEC_TYPE_VIDEO,
6483     CODEC_ID_RV10,
6484     sizeof(MpegEncContext),
6485     MPV_encode_init,
6486     MPV_encode_picture,
6487     MPV_encode_end,
6488     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6489 };
6490
6491 AVCodec rv20_encoder = {
6492     "rv20",
6493     CODEC_TYPE_VIDEO,
6494     CODEC_ID_RV20,
6495     sizeof(MpegEncContext),
6496     MPV_encode_init,
6497     MPV_encode_picture,
6498     MPV_encode_end,
6499     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6500 };
6501
6502 AVCodec mpeg4_encoder = {
6503     "mpeg4",
6504     CODEC_TYPE_VIDEO,
6505     CODEC_ID_MPEG4,
6506     sizeof(MpegEncContext),
6507     MPV_encode_init,
6508     MPV_encode_picture,
6509     MPV_encode_end,
6510     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6511     .capabilities= CODEC_CAP_DELAY,
6512 };
6513
6514 AVCodec msmpeg4v1_encoder = {
6515     "msmpeg4v1",
6516     CODEC_TYPE_VIDEO,
6517     CODEC_ID_MSMPEG4V1,
6518     sizeof(MpegEncContext),
6519     MPV_encode_init,
6520     MPV_encode_picture,
6521     MPV_encode_end,
6522     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6523 };
6524
6525 AVCodec msmpeg4v2_encoder = {
6526     "msmpeg4v2",
6527     CODEC_TYPE_VIDEO,
6528     CODEC_ID_MSMPEG4V2,
6529     sizeof(MpegEncContext),
6530     MPV_encode_init,
6531     MPV_encode_picture,
6532     MPV_encode_end,
6533     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6534 };
6535
6536 AVCodec msmpeg4v3_encoder = {
6537     "msmpeg4",
6538     CODEC_TYPE_VIDEO,
6539     CODEC_ID_MSMPEG4V3,
6540     sizeof(MpegEncContext),
6541     MPV_encode_init,
6542     MPV_encode_picture,
6543     MPV_encode_end,
6544     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6545 };
6546
6547 AVCodec wmv1_encoder = {
6548     "wmv1",
6549     CODEC_TYPE_VIDEO,
6550     CODEC_ID_WMV1,
6551     sizeof(MpegEncContext),
6552     MPV_encode_init,
6553     MPV_encode_picture,
6554     MPV_encode_end,
6555     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6556 };
6557
6558 AVCodec mjpeg_encoder = {
6559     "mjpeg",
6560     CODEC_TYPE_VIDEO,
6561     CODEC_ID_MJPEG,
6562     sizeof(MpegEncContext),
6563     MPV_encode_init,
6564     MPV_encode_picture,
6565     MPV_encode_end,
6566     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6567 };
6568
6569 #endif //CONFIG_ENCODERS