git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 546     COPY(progressive_frame); //FIXME don't set in encode_header
 547     COPY(partitioned_frame); //FIXME don't set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_JPEGLS:
1119     case CODEC_ID_MJPEG:
1120         s->out_format = FMT_MJPEG;
1121         s->intra_only = 1; /* force intra only for jpeg */
1122         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1123         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1124         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1125         s->mjpeg_vsample[1] = 1;
1126         s->mjpeg_vsample[2] = 1;
1127         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1128         s->mjpeg_hsample[1] = 1;
1129         s->mjpeg_hsample[2] = 1;
1130         if (mjpeg_init(s) < 0)
1131             return -1;
1132         avctx->delay=0;
1133         s->low_delay=1;
1134         break;
1135     case CODEC_ID_H261:
1136         s->out_format = FMT_H261;
1137         avctx->delay=0;
1138         s->low_delay=1;
1139         break;
1140     case CODEC_ID_H263:
1141         if (h263_get_picture_format(s->width, s->height) == 7) {
1142             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1143             return -1;
1144         }
1145         s->out_format = FMT_H263;
1146         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1147         avctx->delay=0;
1148         s->low_delay=1;
1149         break;
1150     case CODEC_ID_H263P:
1151         s->out_format = FMT_H263;
1152         s->h263_plus = 1;
1153         /* Fx */
1154         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1155         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1156         s->modified_quant= s->h263_aic;
1157         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1158         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1159         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1160         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1161         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1162
1163         /* /Fx */
1164         /* These are just to be sure */
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_FLV1:
1169         s->out_format = FMT_H263;
1170         s->h263_flv = 2; /* format = 1; 11-bit codes */
1171         s->unrestricted_mv = 1;
1172         s->rtp_mode=0; /* don't allow GOB */
1173         avctx->delay=0;
1174         s->low_delay=1;
1175         break;
1176     case CODEC_ID_RV10:
1177         s->out_format = FMT_H263;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_RV20:
1182         s->out_format = FMT_H263;
1183         avctx->delay=0;
1184         s->low_delay=1;
1185         s->modified_quant=1;
1186         s->h263_aic=1;
1187         s->h263_plus=1;
1188         s->loop_filter=1;
1189         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1190         break;
1191     case CODEC_ID_MPEG4:
1192         s->out_format = FMT_H263;
1193         s->h263_pred = 1;
1194         s->unrestricted_mv = 1;
1195         s->low_delay= s->max_b_frames ? 0 : 1;
1196         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1197         break;
1198     case CODEC_ID_MSMPEG4V1:
1199         s->out_format = FMT_H263;
1200         s->h263_msmpeg4 = 1;
1201         s->h263_pred = 1;
1202         s->unrestricted_mv = 1;
1203         s->msmpeg4_version= 1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_MSMPEG4V2:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 2;
1213         avctx->delay=0;
1214         s->low_delay=1;
1215         break;
1216     case CODEC_ID_MSMPEG4V3:
1217         s->out_format = FMT_H263;
1218         s->h263_msmpeg4 = 1;
1219         s->h263_pred = 1;
1220         s->unrestricted_mv = 1;
1221         s->msmpeg4_version= 3;
1222         s->flipflop_rounding=1;
1223         avctx->delay=0;
1224         s->low_delay=1;
1225         break;
1226     case CODEC_ID_WMV1:
1227         s->out_format = FMT_H263;
1228         s->h263_msmpeg4 = 1;
1229         s->h263_pred = 1;
1230         s->unrestricted_mv = 1;
1231         s->msmpeg4_version= 4;
1232         s->flipflop_rounding=1;
1233         avctx->delay=0;
1234         s->low_delay=1;
1235         break;
1236     case CODEC_ID_WMV2:
1237         s->out_format = FMT_H263;
1238         s->h263_msmpeg4 = 1;
1239         s->h263_pred = 1;
1240         s->unrestricted_mv = 1;
1241         s->msmpeg4_version= 5;
1242         s->flipflop_rounding=1;
1243         avctx->delay=0;
1244         s->low_delay=1;
1245         break;
1246     default:
1247         return -1;
1248     }
1249
1250     avctx->has_b_frames= !s->low_delay;
1251
1252     s->encoding = 1;
1253
1254     /* init */
1255     if (MPV_common_init(s) < 0)
1256         return -1;
1257
1258     if(s->modified_quant)
1259         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1260     s->progressive_frame=
1261     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1262     s->quant_precision=5;
1263
1264     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1265     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1266
1267 #ifdef CONFIG_H261_ENCODER
1268     if (s->out_format == FMT_H261)
1269         ff_h261_encode_init(s);
1270 #endif
1271     if (s->out_format == FMT_H263)
1272         h263_encode_init(s);
1273     if(s->msmpeg4_version)
1274         ff_msmpeg4_encode_init(s);
1275     if (s->out_format == FMT_MPEG1)
1276         ff_mpeg1_encode_init(s);
1277
1278     /* init q matrix */
1279     for(i=0;i<64;i++) {
1280         int j= s->dsp.idct_permutation[i];
1281         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1282             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1283             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1284         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1285             s->intra_matrix[j] =
1286             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1287         }else
1288         { /* mpeg1/2 */
1289             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1290             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1291         }
1292         if(s->avctx->intra_matrix)
1293             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1294         if(s->avctx->inter_matrix)
1295             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1296     }
1297
1298     /* precompute matrix */
1299     /* for mjpeg, we do include qscale in the matrix */
1300     if (s->out_format != FMT_MJPEG) {
1301         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1302                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1303         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1304                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1305     }
1306
1307     if(ff_rate_control_init(s) < 0)
1308         return -1;
1309
1310     return 0;
1311 }
1312
1313 int MPV_encode_end(AVCodecContext *avctx)
1314 {
1315     MpegEncContext *s = avctx->priv_data;
1316
1317 #ifdef STATS
1318     print_stats();
1319 #endif
1320
1321     ff_rate_control_uninit(s);
1322
1323     MPV_common_end(s);
1324     if (s->out_format == FMT_MJPEG)
1325         mjpeg_close(s);
1326
1327     av_freep(&avctx->extradata);
1328
1329     return 0;
1330 }
1331
1332 #endif //CONFIG_ENCODERS
1333
1334 void init_rl(RLTable *rl, int use_static)
1335 {
1336     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1337     uint8_t index_run[MAX_RUN+1];
1338     int last, run, level, start, end, i;
1339
1340     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1341     if(use_static && rl->max_level[0])
1342         return;
1343
1344     /* compute max_level[], max_run[] and index_run[] */
1345     for(last=0;last<2;last++) {
1346         if (last == 0) {
1347             start = 0;
1348             end = rl->last;
1349         } else {
1350             start = rl->last;
1351             end = rl->n;
1352         }
1353
1354         memset(max_level, 0, MAX_RUN + 1);
1355         memset(max_run, 0, MAX_LEVEL + 1);
1356         memset(index_run, rl->n, MAX_RUN + 1);
1357         for(i=start;i<end;i++) {
1358             run = rl->table_run[i];
1359             level = rl->table_level[i];
1360             if (index_run[run] == rl->n)
1361                 index_run[run] = i;
1362             if (level > max_level[run])
1363                 max_level[run] = level;
1364             if (run > max_run[level])
1365                 max_run[level] = run;
1366         }
1367         if(use_static)
1368             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1369         else
1370             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1371         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1372         if(use_static)
1373             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1374         else
1375             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1376         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1377         if(use_static)
1378             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1379         else
1380             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1381         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1382     }
1383 }
1384
1385 /* draw the edges of width 'w' of an image of size width, height */
1386 //FIXME check that this is ok for mpeg4 interlaced
1387 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1388 {
1389     uint8_t *ptr, *last_line;
1390     int i;
1391
1392     last_line = buf + (height - 1) * wrap;
1393     for(i=0;i<w;i++) {
1394         /* top and bottom */
1395         memcpy(buf - (i + 1) * wrap, buf, width);
1396         memcpy(last_line + (i + 1) * wrap, last_line, width);
1397     }
1398     /* left and right */
1399     ptr = buf;
1400     for(i=0;i<height;i++) {
1401         memset(ptr - w, ptr[0], w);
1402         memset(ptr + width, ptr[width-1], w);
1403         ptr += wrap;
1404     }
1405     /* corners */
1406     for(i=0;i<w;i++) {
1407         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1408         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1409         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1410         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1411     }
1412 }
1413
1414 int ff_find_unused_picture(MpegEncContext *s, int shared){
1415     int i;
1416
1417     if(shared){
1418         for(i=0; i<MAX_PICTURE_COUNT; i++){
1419             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1420         }
1421     }else{
1422         for(i=0; i<MAX_PICTURE_COUNT; i++){
1423             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1424         }
1425         for(i=0; i<MAX_PICTURE_COUNT; i++){
1426             if(s->picture[i].data[0]==NULL) return i;
1427         }
1428     }
1429
1430     assert(0);
1431     return -1;
1432 }
1433
1434 static void update_noise_reduction(MpegEncContext *s){
1435     int intra, i;
1436
1437     for(intra=0; intra<2; intra++){
1438         if(s->dct_count[intra] > (1<<16)){
1439             for(i=0; i<64; i++){
1440                 s->dct_error_sum[intra][i] >>=1;
1441             }
1442             s->dct_count[intra] >>= 1;
1443         }
1444
1445         for(i=0; i<64; i++){
1446             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1447         }
1448     }
1449 }
1450
1451 /**
1452  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1453  */
1454 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1455 {
1456     int i;
1457     AVFrame *pic;
1458     s->mb_skipped = 0;
1459
1460     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1461
1462     /* mark&release old frames */
1463     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1464         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1465
1466         /* release forgotten pictures */
1467         /* if(mpeg124/h263) */
1468         if(!s->encoding){
1469             for(i=0; i<MAX_PICTURE_COUNT; i++){
1470                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1471                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1472                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1473                 }
1474             }
1475         }
1476     }
1477 alloc:
1478     if(!s->encoding){
1479         /* release non reference frames */
1480         for(i=0; i<MAX_PICTURE_COUNT; i++){
1481             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1482                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1483             }
1484         }
1485
1486         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1487             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1488         else{
1489             i= ff_find_unused_picture(s, 0);
1490             pic= (AVFrame*)&s->picture[i];
1491         }
1492
1493         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1494                         && !s->dropable ? 3 : 0;
1495
1496         pic->coded_picture_number= s->coded_picture_number++;
1497
1498         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1499             return -1;
1500
1501         s->current_picture_ptr= (Picture*)pic;
1502         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1503         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1504     }
1505
1506     s->current_picture_ptr->pict_type= s->pict_type;
1507 //    if(s->flags && CODEC_FLAG_QSCALE)
1508   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1509     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1510
1511     copy_picture(&s->current_picture, s->current_picture_ptr);
1512
1513   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1514     if (s->pict_type != B_TYPE) {
1515         s->last_picture_ptr= s->next_picture_ptr;
1516         if(!s->dropable)
1517             s->next_picture_ptr= s->current_picture_ptr;
1518     }
1519 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1520         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1521         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1522         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1523         s->pict_type, s->dropable);*/
1524
1525     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1526     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1527
1528     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1529         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1530         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1531         goto alloc;
1532     }
1533
1534     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1535
1536     if(s->picture_structure!=PICT_FRAME){
1537         int i;
1538         for(i=0; i<4; i++){
1539             if(s->picture_structure == PICT_BOTTOM_FIELD){
1540                  s->current_picture.data[i] += s->current_picture.linesize[i];
1541             }
1542             s->current_picture.linesize[i] *= 2;
1543             s->last_picture.linesize[i] *=2;
1544             s->next_picture.linesize[i] *=2;
1545         }
1546     }
1547   }
1548
1549     s->hurry_up= s->avctx->hurry_up;
1550     s->error_resilience= avctx->error_resilience;
1551
1552     /* set dequantizer, we can't do it during init as it might change for mpeg4
1553        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1554     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1555         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1556         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1557     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1558         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1559         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1560     }else{
1561         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1562         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1563     }
1564
1565     if(s->dct_error_sum){
1566         assert(s->avctx->noise_reduction && s->encoding);
1567
1568         update_noise_reduction(s);
1569     }
1570
1571 #ifdef HAVE_XVMC
1572     if(s->avctx->xvmc_acceleration)
1573         return XVMC_field_start(s, avctx);
1574 #endif
1575     return 0;
1576 }
1577
1578 /* generic function for encode/decode called after a frame has been coded/decoded */
1579 void MPV_frame_end(MpegEncContext *s)
1580 {
1581     int i;
1582     /* draw edge for correct motion prediction if outside */
1583 #ifdef HAVE_XVMC
1584 //just to make sure that all data is rendered.
1585     if(s->avctx->xvmc_acceleration){
1586         XVMC_field_end(s);
1587     }else
1588 #endif
1589     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1590             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1591             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1593     }
1594     emms_c();
1595
1596     s->last_pict_type    = s->pict_type;
1597     if(s->pict_type!=B_TYPE){
1598         s->last_non_b_pict_type= s->pict_type;
1599     }
1600 #if 0
1601         /* copy back current_picture variables */
1602     for(i=0; i<MAX_PICTURE_COUNT; i++){
1603         if(s->picture[i].data[0] == s->current_picture.data[0]){
1604             s->picture[i]= s->current_picture;
1605             break;
1606         }
1607     }
1608     assert(i<MAX_PICTURE_COUNT);
1609 #endif
1610
1611     if(s->encoding){
1612         /* release non-reference frames */
1613         for(i=0; i<MAX_PICTURE_COUNT; i++){
1614             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1615                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1616             }
1617         }
1618     }
1619     // clear copies, to avoid confusion
1620 #if 0
1621     memset(&s->last_picture, 0, sizeof(Picture));
1622     memset(&s->next_picture, 0, sizeof(Picture));
1623     memset(&s->current_picture, 0, sizeof(Picture));
1624 #endif
1625     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1626 }
1627
1628 /**
1629  * draws an line from (ex, ey) -> (sx, sy).
1630  * @param w width of the image
1631  * @param h height of the image
1632  * @param stride stride/linesize of the image
1633  * @param color color of the arrow
1634  */
1635 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1636     int t, x, y, fr, f;
1637
1638     sx= clip(sx, 0, w-1);
1639     sy= clip(sy, 0, h-1);
1640     ex= clip(ex, 0, w-1);
1641     ey= clip(ey, 0, h-1);
1642
1643     buf[sy*stride + sx]+= color;
1644
1645     if(ABS(ex - sx) > ABS(ey - sy)){
1646         if(sx > ex){
1647             t=sx; sx=ex; ex=t;
1648             t=sy; sy=ey; ey=t;
1649         }
1650         buf+= sx + sy*stride;
1651         ex-= sx;
1652         f= ((ey-sy)<<16)/ex;
1653         for(x= 0; x <= ex; x++){
1654             y = (x*f)>>16;
1655             fr= (x*f)&0xFFFF;
1656             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1657             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1658         }
1659     }else{
1660         if(sy > ey){
1661             t=sx; sx=ex; ex=t;
1662             t=sy; sy=ey; ey=t;
1663         }
1664         buf+= sx + sy*stride;
1665         ey-= sy;
1666         if(ey) f= ((ex-sx)<<16)/ey;
1667         else   f= 0;
1668         for(y= 0; y <= ey; y++){
1669             x = (y*f)>>16;
1670             fr= (y*f)&0xFFFF;
1671             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1672             buf[y*stride + x+1]+= (color*         fr )>>16;;
1673         }
1674     }
1675 }
1676
1677 /**
1678  * draws an arrow from (ex, ey) -> (sx, sy).
1679  * @param w width of the image
1680  * @param h height of the image
1681  * @param stride stride/linesize of the image
1682  * @param color color of the arrow
1683  */
1684 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1685     int dx,dy;
1686
1687     sx= clip(sx, -100, w+100);
1688     sy= clip(sy, -100, h+100);
1689     ex= clip(ex, -100, w+100);
1690     ey= clip(ey, -100, h+100);
1691
1692     dx= ex - sx;
1693     dy= ey - sy;
1694
1695     if(dx*dx + dy*dy > 3*3){
1696         int rx=  dx + dy;
1697         int ry= -dx + dy;
1698         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1699
1700         //FIXME subpixel accuracy
1701         rx= ROUNDED_DIV(rx*3<<4, length);
1702         ry= ROUNDED_DIV(ry*3<<4, length);
1703
1704         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1705         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1706     }
1707     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1708 }
1709
1710 /**
1711  * prints debuging info for the given picture.
1712  */
1713 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1714
1715     if(!pict || !pict->mb_type) return;
1716
1717     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1718         int x,y;
1719
1720         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1721         switch (pict->pict_type) {
1722             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1723             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1724             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1725             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1726             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1727             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1728         }
1729         for(y=0; y<s->mb_height; y++){
1730             for(x=0; x<s->mb_width; x++){
1731                 if(s->avctx->debug&FF_DEBUG_SKIP){
1732                     int count= s->mbskip_table[x + y*s->mb_stride];
1733                     if(count>9) count=9;
1734                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1735                 }
1736                 if(s->avctx->debug&FF_DEBUG_QP){
1737                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1738                 }
1739                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1740                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1741                     //Type & MV direction
1742                     if(IS_PCM(mb_type))
1743                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1744                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1745                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1746                     else if(IS_INTRA4x4(mb_type))
1747                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1748                     else if(IS_INTRA16x16(mb_type))
1749                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1750                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1751                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1752                     else if(IS_DIRECT(mb_type))
1753                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1754                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1755                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1756                     else if(IS_GMC(mb_type))
1757                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1758                     else if(IS_SKIP(mb_type))
1759                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1760                     else if(!USES_LIST(mb_type, 1))
1761                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1762                     else if(!USES_LIST(mb_type, 0))
1763                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1764                     else{
1765                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1766                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1767                     }
1768
1769                     //segmentation
1770                     if(IS_8X8(mb_type))
1771                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1772                     else if(IS_16X8(mb_type))
1773                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1774                     else if(IS_8X16(mb_type))
1775                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1776                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1777                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1778                     else
1779                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1780
1781
1782                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1783                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1784                     else
1785                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1786                 }
1787 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1788             }
1789             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1790         }
1791     }
1792
1793     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1794         const int shift= 1 + s->quarter_sample;
1795         int mb_y;
1796         uint8_t *ptr;
1797         int i;
1798         int h_chroma_shift, v_chroma_shift;
1799         const int width = s->avctx->width;
1800         const int height= s->avctx->height;
1801         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1802         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1803         s->low_delay=0; //needed to see the vectors without trashing the buffers
1804
1805         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1806         for(i=0; i<3; i++){
1807             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1808             pict->data[i]= s->visualization_buffer[i];
1809         }
1810         pict->type= FF_BUFFER_TYPE_COPY;
1811         ptr= pict->data[0];
1812
1813         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1814             int mb_x;
1815             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1816                 const int mb_index= mb_x + mb_y*s->mb_stride;
1817                 if((s->avctx->debug_mv) && pict->motion_val){
1818                   int type;
1819                   for(type=0; type<3; type++){
1820                     int direction = 0;
1821                     switch (type) {
1822                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1823                                 continue;
1824                               direction = 0;
1825                               break;
1826                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1827                                 continue;
1828                               direction = 0;
1829                               break;
1830                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1831                                 continue;
1832                               direction = 1;
1833                               break;
1834                     }
1835                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1836                         continue;
1837
1838                     if(IS_8X8(pict->mb_type[mb_index])){
1839                       int i;
1840                       for(i=0; i<4; i++){
1841                         int sx= mb_x*16 + 4 + 8*(i&1);
1842                         int sy= mb_y*16 + 4 + 8*(i>>1);
1843                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1844                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1845                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1846                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1847                       }
1848                     }else if(IS_16X8(pict->mb_type[mb_index])){
1849                       int i;
1850                       for(i=0; i<2; i++){
1851                         int sx=mb_x*16 + 8;
1852                         int sy=mb_y*16 + 4 + 8*i;
1853                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1854                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1855                         int my=(pict->motion_val[direction][xy][1]>>shift);
1856
1857                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1858                             my*=2;
1859
1860                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1861                       }
1862                     }else if(IS_8X16(pict->mb_type[mb_index])){
1863                       int i;
1864                       for(i=0; i<2; i++){
1865                         int sx=mb_x*16 + 4 + 8*i;
1866                         int sy=mb_y*16 + 8;
1867                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1868                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1869                         int my=(pict->motion_val[direction][xy][1]>>shift);
1870
1871                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1872                             my*=2;
1873
1874                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1875                       }
1876                     }else{
1877                       int sx= mb_x*16 + 8;
1878                       int sy= mb_y*16 + 8;
1879                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1880                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1881                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1882                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1883                     }
1884                   }
1885                 }
1886                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1887                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1888                     int y;
1889                     for(y=0; y<8; y++){
1890                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1891                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1892                     }
1893                 }
1894                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1895                     int mb_type= pict->mb_type[mb_index];
1896                     uint64_t u,v;
1897                     int y;
1898 #define COLOR(theta, r)\
1899 u= (int)(128 + r*cos(theta*3.141592/180));\
1900 v= (int)(128 + r*sin(theta*3.141592/180));
1901
1902
1903                     u=v=128;
1904                     if(IS_PCM(mb_type)){
1905                         COLOR(120,48)
1906                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1907                         COLOR(30,48)
1908                     }else if(IS_INTRA4x4(mb_type)){
1909                         COLOR(90,48)
1910                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1911 //                        COLOR(120,48)
1912                     }else if(IS_DIRECT(mb_type)){
1913                         COLOR(150,48)
1914                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1915                         COLOR(170,48)
1916                     }else if(IS_GMC(mb_type)){
1917                         COLOR(190,48)
1918                     }else if(IS_SKIP(mb_type)){
1919 //                        COLOR(180,48)
1920                     }else if(!USES_LIST(mb_type, 1)){
1921                         COLOR(240,48)
1922                     }else if(!USES_LIST(mb_type, 0)){
1923                         COLOR(0,48)
1924                     }else{
1925                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1926                         COLOR(300,48)
1927                     }
1928
1929                     u*= 0x0101010101010101ULL;
1930                     v*= 0x0101010101010101ULL;
1931                     for(y=0; y<8; y++){
1932                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1933                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1934                     }
1935
1936                     //segmentation
1937                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1940                     }
1941                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1942                         for(y=0; y<16; y++)
1943                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1944                     }
1945                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1946                         int dm= 1 << (mv_sample_log2-2);
1947                         for(i=0; i<4; i++){
1948                             int sx= mb_x*16 + 8*(i&1);
1949                             int sy= mb_y*16 + 8*(i>>1);
1950                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1951                             //FIXME bidir
1952                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1953                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1954                                 for(y=0; y<8; y++)
1955                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1956                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1957                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1958                         }
1959                     }
1960
1961                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1962                         // hmm
1963                     }
1964                 }
1965                 s->mbskip_table[mb_index]=0;
1966             }
1967         }
1968     }
1969 }
1970
1971 #ifdef CONFIG_ENCODERS
1972
1973 static int get_sae(uint8_t *src, int ref, int stride){
1974     int x,y;
1975     int acc=0;
1976
1977     for(y=0; y<16; y++){
1978         for(x=0; x<16; x++){
1979             acc+= ABS(src[x+y*stride] - ref);
1980         }
1981     }
1982
1983     return acc;
1984 }
1985
1986 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1987     int x, y, w, h;
1988     int acc=0;
1989
1990     w= s->width &~15;
1991     h= s->height&~15;
1992
1993     for(y=0; y<h; y+=16){
1994         for(x=0; x<w; x+=16){
1995             int offset= x + y*stride;
1996             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1997             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1998             int sae = get_sae(src + offset, mean, stride);
1999
2000             acc+= sae + 500 < sad;
2001         }
2002     }
2003     return acc;
2004 }
2005
2006
2007 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2008     AVFrame *pic=NULL;
2009     int64_t pts;
2010     int i;
2011     const int encoding_delay= s->max_b_frames;
2012     int direct=1;
2013
2014     if(pic_arg){
2015         pts= pic_arg->pts;
2016         pic_arg->display_picture_number= s->input_picture_number++;
2017
2018         if(pts != AV_NOPTS_VALUE){
2019             if(s->user_specified_pts != AV_NOPTS_VALUE){
2020                 int64_t time= pts;
2021                 int64_t last= s->user_specified_pts;
2022
2023                 if(time <= last){
2024                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2025                     return -1;
2026                 }
2027             }
2028             s->user_specified_pts= pts;
2029         }else{
2030             if(s->user_specified_pts != AV_NOPTS_VALUE){
2031                 s->user_specified_pts=
2032                 pts= s->user_specified_pts + 1;
2033                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2034             }else{
2035                 pts= pic_arg->display_picture_number;
2036             }
2037         }
2038     }
2039
2040   if(pic_arg){
2041     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2042     if(pic_arg->linesize[0] != s->linesize) direct=0;
2043     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2044     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2045
2046 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2047
2048     if(direct){
2049         i= ff_find_unused_picture(s, 1);
2050
2051         pic= (AVFrame*)&s->picture[i];
2052         pic->reference= 3;
2053
2054         for(i=0; i<4; i++){
2055             pic->data[i]= pic_arg->data[i];
2056             pic->linesize[i]= pic_arg->linesize[i];
2057         }
2058         alloc_picture(s, (Picture*)pic, 1);
2059     }else{
2060         int offset= 16;
2061         i= ff_find_unused_picture(s, 0);
2062
2063         pic= (AVFrame*)&s->picture[i];
2064         pic->reference= 3;
2065
2066         alloc_picture(s, (Picture*)pic, 0);
2067
2068         if(   pic->data[0] + offset == pic_arg->data[0]
2069            && pic->data[1] + offset == pic_arg->data[1]
2070            && pic->data[2] + offset == pic_arg->data[2]){
2071        // empty
2072         }else{
2073             int h_chroma_shift, v_chroma_shift;
2074             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2075
2076             for(i=0; i<3; i++){
2077                 int src_stride= pic_arg->linesize[i];
2078                 int dst_stride= i ? s->uvlinesize : s->linesize;
2079                 int h_shift= i ? h_chroma_shift : 0;
2080                 int v_shift= i ? v_chroma_shift : 0;
2081                 int w= s->width >>h_shift;
2082                 int h= s->height>>v_shift;
2083                 uint8_t *src= pic_arg->data[i];
2084                 uint8_t *dst= pic->data[i] + offset;
2085
2086                 if(src_stride==dst_stride)
2087                     memcpy(dst, src, src_stride*h);
2088                 else{
2089                     while(h--){
2090                         memcpy(dst, src, w);
2091                         dst += dst_stride;
2092                         src += src_stride;
2093                     }
2094                 }
2095             }
2096         }
2097     }
2098     copy_picture_attributes(s, pic, pic_arg);
2099     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2100   }
2101
2102     /* shift buffer entries */
2103     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2104         s->input_picture[i-1]= s->input_picture[i];
2105
2106     s->input_picture[encoding_delay]= (Picture*)pic;
2107
2108     return 0;
2109 }
2110
2111 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2112     int x, y, plane;
2113     int score=0;
2114     int64_t score64=0;
2115
2116     for(plane=0; plane<3; plane++){
2117         const int stride= p->linesize[plane];
2118         const int bw= plane ? 1 : 2;
2119         for(y=0; y<s->mb_height*bw; y++){
2120             for(x=0; x<s->mb_width*bw; x++){
2121                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2122                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2123
2124                 switch(s->avctx->frame_skip_exp){
2125                     case 0: score= FFMAX(score, v); break;
2126                     case 1: score+= ABS(v);break;
2127                     case 2: score+= v*v;break;
2128                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2129                     case 4: score64+= v*v*(int64_t)(v*v);break;
2130                 }
2131             }
2132         }
2133     }
2134
2135     if(score) score64= score;
2136
2137     if(score64 < s->avctx->frame_skip_threshold)
2138         return 1;
2139     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2140         return 1;
2141     return 0;
2142 }
2143
2144 static int estimate_best_b_count(MpegEncContext *s){
2145     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2146     AVCodecContext *c= avcodec_alloc_context();
2147     AVFrame input[FF_MAX_B_FRAMES+2];
2148     const int scale= s->avctx->brd_scale;
2149     int i, j, out_size;
2150     int outbuf_size= s->width * s->height; //FIXME
2151     uint8_t *outbuf= av_malloc(outbuf_size);
2152     ImgReSampleContext *resample;
2153     int64_t best_rd= INT64_MAX;
2154     int best_b_count= -1;
2155     const int lambda2= s->lambda2;
2156
2157     c->width = s->width >> scale;
2158     c->height= s->height>> scale;
2159     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2160     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2161     c->mb_decision= s->avctx->mb_decision;
2162     c->me_cmp= s->avctx->me_cmp;
2163     c->mb_cmp= s->avctx->mb_cmp;
2164     c->me_sub_cmp= s->avctx->me_sub_cmp;
2165     c->pix_fmt = PIX_FMT_YUV420P;
2166     c->time_base= s->avctx->time_base;
2167     c->max_b_frames= s->max_b_frames;
2168
2169     if (avcodec_open(c, codec) < 0)
2170         return -1;
2171
2172     resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2173
2174     for(i=0; i<s->max_b_frames+2; i++){
2175         int ysize= c->width*c->height;
2176         int csize= (c->width/2)*(c->height/2);
2177
2178         avcodec_get_frame_defaults(&input[i]);
2179         input[i].data[0]= av_malloc(ysize + 2*csize);
2180         input[i].data[1]= input[i].data[0] + ysize;
2181         input[i].data[2]= input[i].data[1] + csize;
2182         input[i].linesize[0]= c->width;
2183         input[i].linesize[1]=
2184         input[i].linesize[2]= c->width/2;
2185
2186         if(!i || s->input_picture[i-1])
2187             img_resample(resample, &input[i], i ? s->input_picture[i-1] : s->next_picture_ptr);
2188     }
2189
2190     for(j=0; j<s->max_b_frames+1; j++){
2191         int64_t rd=0;
2192
2193         if(!s->input_picture[j])
2194             break;
2195
2196         c->error[0]= c->error[1]= c->error[2]= 0;
2197
2198         input[0].pict_type= I_TYPE;
2199         input[0].quality= 2 * FF_QP2LAMBDA;
2200         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2201         rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2202
2203         for(i=0; i<s->max_b_frames+1; i++){
2204             int is_p= i % (j+1) == j || i==s->max_b_frames;
2205
2206             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2207             input[i+1].quality= s->rc_context.last_qscale_for[input[i+1].pict_type];
2208             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2209             rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2210         }
2211
2212         /* get the delayed frames */
2213         while(out_size){
2214             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2215             rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2216         }
2217
2218         rd += c->error[0] + c->error[1] + c->error[2];
2219
2220         if(rd < best_rd){
2221             best_rd= rd;
2222             best_b_count= j;
2223         }
2224     }
2225
2226     av_freep(&outbuf);
2227     avcodec_close(c);
2228     av_freep(&c);
2229     img_resample_close(resample);
2230
2231     for(i=0; i<s->max_b_frames+2; i++){
2232         av_freep(&input[i].data[0]);
2233     }
2234
2235     return best_b_count;
2236 }
2237
2238 static void select_input_picture(MpegEncContext *s){
2239     int i;
2240
2241     for(i=1; i<MAX_PICTURE_COUNT; i++)
2242         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2243     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2244
2245     /* set next picture type & ordering */
2246     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2247         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2248             s->reordered_input_picture[0]= s->input_picture[0];
2249             s->reordered_input_picture[0]->pict_type= I_TYPE;
2250             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2251         }else{
2252             int b_frames;
2253
2254             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2255                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2256                 //FIXME check that te gop check above is +-1 correct
2257 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2258
2259                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2260                         for(i=0; i<4; i++)
2261                             s->input_picture[0]->data[i]= NULL;
2262                         s->input_picture[0]->type= 0;
2263                     }else{
2264                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2265                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2266
2267                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2268                     }
2269
2270                     emms_c();
2271                     ff_vbv_update(s, 0);
2272
2273                     goto no_output_pic;
2274                 }
2275             }
2276
2277             if(s->flags&CODEC_FLAG_PASS2){
2278                 for(i=0; i<s->max_b_frames+1; i++){
2279                     int pict_num= s->input_picture[0]->display_picture_number + i;
2280
2281                     if(pict_num >= s->rc_context.num_entries)
2282                         break;
2283                     if(!s->input_picture[i]){
2284                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2285                         break;
2286                     }
2287
2288                     s->input_picture[i]->pict_type=
2289                         s->rc_context.entry[pict_num].new_pict_type;
2290                 }
2291             }
2292
2293             if(s->avctx->b_frame_strategy==0){
2294                 b_frames= s->max_b_frames;
2295                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2296             }else if(s->avctx->b_frame_strategy==1){
2297                 for(i=1; i<s->max_b_frames+1; i++){
2298                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2299                         s->input_picture[i]->b_frame_score=
2300                             get_intra_count(s, s->input_picture[i  ]->data[0],
2301                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2302                     }
2303                 }
2304                 for(i=0; i<s->max_b_frames+1; i++){
2305                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2306                 }
2307
2308                 b_frames= FFMAX(0, i-1);
2309
2310                 /* reset scores */
2311                 for(i=0; i<b_frames+1; i++){
2312                     s->input_picture[i]->b_frame_score=0;
2313                 }
2314             }else if(s->avctx->b_frame_strategy==2){
2315                 b_frames= estimate_best_b_count(s);
2316             }else{
2317                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2318                 b_frames=0;
2319             }
2320
2321             emms_c();
2322 //static int b_count=0;
2323 //b_count+= b_frames;
2324 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2325
2326             for(i= b_frames - 1; i>=0; i--){
2327                 int type= s->input_picture[i]->pict_type;
2328                 if(type && type != B_TYPE)
2329                     b_frames= i;
2330             }
2331             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2332                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2333             }
2334
2335             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2336               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2337                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2338               }else{
2339                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2340                     b_frames=0;
2341                 s->input_picture[b_frames]->pict_type= I_TYPE;
2342               }
2343             }
2344
2345             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2346                && b_frames
2347                && s->input_picture[b_frames]->pict_type== I_TYPE)
2348                 b_frames--;
2349
2350             s->reordered_input_picture[0]= s->input_picture[b_frames];
2351             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2352                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2353             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2354             for(i=0; i<b_frames; i++){
2355                 s->reordered_input_picture[i+1]= s->input_picture[i];
2356                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2357                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2358             }
2359         }
2360     }
2361 no_output_pic:
2362     if(s->reordered_input_picture[0]){
2363         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2364
2365         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2366
2367         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2368             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2369
2370             int i= ff_find_unused_picture(s, 0);
2371             Picture *pic= &s->picture[i];
2372
2373             /* mark us unused / free shared pic */
2374             for(i=0; i<4; i++)
2375                 s->reordered_input_picture[0]->data[i]= NULL;
2376             s->reordered_input_picture[0]->type= 0;
2377
2378             pic->reference              = s->reordered_input_picture[0]->reference;
2379
2380             alloc_picture(s, pic, 0);
2381
2382             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2383
2384             s->current_picture_ptr= pic;
2385         }else{
2386             // input is not a shared pix -> reuse buffer for current_pix
2387
2388             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2389                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2390
2391             s->current_picture_ptr= s->reordered_input_picture[0];
2392             for(i=0; i<4; i++){
2393                 s->new_picture.data[i]+=16;
2394             }
2395         }
2396         copy_picture(&s->current_picture, s->current_picture_ptr);
2397
2398         s->picture_number= s->new_picture.display_picture_number;
2399 //printf("dpn:%d\n", s->picture_number);
2400     }else{
2401        memset(&s->new_picture, 0, sizeof(Picture));
2402     }
2403 }
2404
2405 int MPV_encode_picture(AVCodecContext *avctx,
2406                        unsigned char *buf, int buf_size, void *data)
2407 {
2408     MpegEncContext *s = avctx->priv_data;
2409     AVFrame *pic_arg = data;
2410     int i, stuffing_count;
2411
2412     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2413         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2414         return -1;
2415     }
2416
2417     for(i=0; i<avctx->thread_count; i++){
2418         int start_y= s->thread_context[i]->start_mb_y;
2419         int   end_y= s->thread_context[i]->  end_mb_y;
2420         int h= s->mb_height;
2421         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2422         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2423
2424         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2425     }
2426
2427     s->picture_in_gop_number++;
2428
2429     if(load_input_picture(s, pic_arg) < 0)
2430         return -1;
2431
2432     select_input_picture(s);
2433
2434     /* output? */
2435     if(s->new_picture.data[0]){
2436         s->pict_type= s->new_picture.pict_type;
2437 //emms_c();
2438 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2439         MPV_frame_start(s, avctx);
2440
2441         encode_picture(s, s->picture_number);
2442
2443         avctx->real_pict_num  = s->picture_number;
2444         avctx->header_bits = s->header_bits;
2445         avctx->mv_bits     = s->mv_bits;
2446         avctx->misc_bits   = s->misc_bits;
2447         avctx->i_tex_bits  = s->i_tex_bits;
2448         avctx->p_tex_bits  = s->p_tex_bits;
2449         avctx->i_count     = s->i_count;
2450         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2451         avctx->skip_count  = s->skip_count;
2452
2453         MPV_frame_end(s);
2454
2455         if (s->out_format == FMT_MJPEG)
2456             mjpeg_picture_trailer(s);
2457
2458         if(s->flags&CODEC_FLAG_PASS1)
2459             ff_write_pass1_stats(s);
2460
2461         for(i=0; i<4; i++){
2462             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2463             avctx->error[i] += s->current_picture_ptr->error[i];
2464         }
2465
2466         if(s->flags&CODEC_FLAG_PASS1)
2467             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2468         flush_put_bits(&s->pb);
2469         s->frame_bits  = put_bits_count(&s->pb);
2470
2471         stuffing_count= ff_vbv_update(s, s->frame_bits);
2472         if(stuffing_count){
2473             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2474                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2475                 return -1;
2476             }
2477
2478             switch(s->codec_id){
2479             case CODEC_ID_MPEG1VIDEO:
2480             case CODEC_ID_MPEG2VIDEO:
2481                 while(stuffing_count--){
2482                     put_bits(&s->pb, 8, 0);
2483                 }
2484             break;
2485             case CODEC_ID_MPEG4:
2486                 put_bits(&s->pb, 16, 0);
2487                 put_bits(&s->pb, 16, 0x1C3);
2488                 stuffing_count -= 4;
2489                 while(stuffing_count--){
2490                     put_bits(&s->pb, 8, 0xFF);
2491                 }
2492             break;
2493             default:
2494                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2495             }
2496             flush_put_bits(&s->pb);
2497             s->frame_bits  = put_bits_count(&s->pb);
2498         }
2499
2500         /* update mpeg1/2 vbv_delay for CBR */
2501         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2502            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2503             int vbv_delay;
2504
2505             assert(s->repeat_first_field==0);
2506
2507             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2508             assert(vbv_delay < 0xFFFF);
2509
2510             s->vbv_delay_ptr[0] &= 0xF8;
2511             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2512             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2513             s->vbv_delay_ptr[2] &= 0x07;
2514             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2515         }
2516         s->total_bits += s->frame_bits;
2517         avctx->frame_bits  = s->frame_bits;
2518     }else{
2519         assert((pbBufPtr(&s->pb) == s->pb.buf));
2520         s->frame_bits=0;
2521     }
2522     assert((s->frame_bits&7)==0);
2523
2524     return s->frame_bits/8;
2525 }
2526
2527 #endif //CONFIG_ENCODERS
2528
2529 static inline void gmc1_motion(MpegEncContext *s,
2530                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2531                                uint8_t **ref_picture)
2532 {
2533     uint8_t *ptr;
2534     int offset, src_x, src_y, linesize, uvlinesize;
2535     int motion_x, motion_y;
2536     int emu=0;
2537
2538     motion_x= s->sprite_offset[0][0];
2539     motion_y= s->sprite_offset[0][1];
2540     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2541     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2542     motion_x<<=(3-s->sprite_warping_accuracy);
2543     motion_y<<=(3-s->sprite_warping_accuracy);
2544     src_x = clip(src_x, -16, s->width);
2545     if (src_x == s->width)
2546         motion_x =0;
2547     src_y = clip(src_y, -16, s->height);
2548     if (src_y == s->height)
2549         motion_y =0;
2550
2551     linesize = s->linesize;
2552     uvlinesize = s->uvlinesize;
2553
2554     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2555
2556     if(s->flags&CODEC_FLAG_EMU_EDGE){
2557         if(   (unsigned)src_x >= s->h_edge_pos - 17
2558            || (unsigned)src_y >= s->v_edge_pos - 17){
2559             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2560             ptr= s->edge_emu_buffer;
2561         }
2562     }
2563
2564     if((motion_x|motion_y)&7){
2565         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2566         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2567     }else{
2568         int dxy;
2569
2570         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2571         if (s->no_rounding){
2572             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2573         }else{
2574             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2575         }
2576     }
2577
2578     if(s->flags&CODEC_FLAG_GRAY) return;
2579
2580     motion_x= s->sprite_offset[1][0];
2581     motion_y= s->sprite_offset[1][1];
2582     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2583     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2584     motion_x<<=(3-s->sprite_warping_accuracy);
2585     motion_y<<=(3-s->sprite_warping_accuracy);
2586     src_x = clip(src_x, -8, s->width>>1);
2587     if (src_x == s->width>>1)
2588         motion_x =0;
2589     src_y = clip(src_y, -8, s->height>>1);
2590     if (src_y == s->height>>1)
2591         motion_y =0;
2592
2593     offset = (src_y * uvlinesize) + src_x;
2594     ptr = ref_picture[1] + offset;
2595     if(s->flags&CODEC_FLAG_EMU_EDGE){
2596         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2597            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2598             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2599             ptr= s->edge_emu_buffer;
2600             emu=1;
2601         }
2602     }
2603     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2604
2605     ptr = ref_picture[2] + offset;
2606     if(emu){
2607         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2608         ptr= s->edge_emu_buffer;
2609     }
2610     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2611
2612     return;
2613 }
2614
2615 static inline void gmc_motion(MpegEncContext *s,
2616                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2617                                uint8_t **ref_picture)
2618 {
2619     uint8_t *ptr;
2620     int linesize, uvlinesize;
2621     const int a= s->sprite_warping_accuracy;
2622     int ox, oy;
2623
2624     linesize = s->linesize;
2625     uvlinesize = s->uvlinesize;
2626
2627     ptr = ref_picture[0];
2628
2629     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2630     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2631
2632     s->dsp.gmc(dest_y, ptr, linesize, 16,
2633            ox,
2634            oy,
2635            s->sprite_delta[0][0], s->sprite_delta[0][1],
2636            s->sprite_delta[1][0], s->sprite_delta[1][1],
2637            a+1, (1<<(2*a+1)) - s->no_rounding,
2638            s->h_edge_pos, s->v_edge_pos);
2639     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2640            ox + s->sprite_delta[0][0]*8,
2641            oy + s->sprite_delta[1][0]*8,
2642            s->sprite_delta[0][0], s->sprite_delta[0][1],
2643            s->sprite_delta[1][0], s->sprite_delta[1][1],
2644            a+1, (1<<(2*a+1)) - s->no_rounding,
2645            s->h_edge_pos, s->v_edge_pos);
2646
2647     if(s->flags&CODEC_FLAG_GRAY) return;
2648
2649     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2650     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2651
2652     ptr = ref_picture[1];
2653     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2654            ox,
2655            oy,
2656            s->sprite_delta[0][0], s->sprite_delta[0][1],
2657            s->sprite_delta[1][0], s->sprite_delta[1][1],
2658            a+1, (1<<(2*a+1)) - s->no_rounding,
2659            s->h_edge_pos>>1, s->v_edge_pos>>1);
2660
2661     ptr = ref_picture[2];
2662     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2663            ox,
2664            oy,
2665            s->sprite_delta[0][0], s->sprite_delta[0][1],
2666            s->sprite_delta[1][0], s->sprite_delta[1][1],
2667            a+1, (1<<(2*a+1)) - s->no_rounding,
2668            s->h_edge_pos>>1, s->v_edge_pos>>1);
2669 }
2670
2671 /**
2672  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2673  * @param buf destination buffer
2674  * @param src source buffer
2675  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2676  * @param block_w width of block
2677  * @param block_h height of block
2678  * @param src_x x coordinate of the top left sample of the block in the source buffer
2679  * @param src_y y coordinate of the top left sample of the block in the source buffer
2680  * @param w width of the source buffer
2681  * @param h height of the source buffer
2682  */
2683 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2684                                     int src_x, int src_y, int w, int h){
2685     int x, y;
2686     int start_y, start_x, end_y, end_x;
2687
2688     if(src_y>= h){
2689         src+= (h-1-src_y)*linesize;
2690         src_y=h-1;
2691     }else if(src_y<=-block_h){
2692         src+= (1-block_h-src_y)*linesize;
2693         src_y=1-block_h;
2694     }
2695     if(src_x>= w){
2696         src+= (w-1-src_x);
2697         src_x=w-1;
2698     }else if(src_x<=-block_w){
2699         src+= (1-block_w-src_x);
2700         src_x=1-block_w;
2701     }
2702
2703     start_y= FFMAX(0, -src_y);
2704     start_x= FFMAX(0, -src_x);
2705     end_y= FFMIN(block_h, h-src_y);
2706     end_x= FFMIN(block_w, w-src_x);
2707
2708     // copy existing part
2709     for(y=start_y; y<end_y; y++){
2710         for(x=start_x; x<end_x; x++){
2711             buf[x + y*linesize]= src[x + y*linesize];
2712         }
2713     }
2714
2715     //top
2716     for(y=0; y<start_y; y++){
2717         for(x=start_x; x<end_x; x++){
2718             buf[x + y*linesize]= buf[x + start_y*linesize];
2719         }
2720     }
2721
2722     //bottom
2723     for(y=end_y; y<block_h; y++){
2724         for(x=start_x; x<end_x; x++){
2725             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2726         }
2727     }
2728
2729     for(y=0; y<block_h; y++){
2730        //left
2731         for(x=0; x<start_x; x++){
2732             buf[x + y*linesize]= buf[start_x + y*linesize];
2733         }
2734
2735        //right
2736         for(x=end_x; x<block_w; x++){
2737             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2738         }
2739     }
2740 }
2741
2742 static inline int hpel_motion(MpegEncContext *s,
2743                                   uint8_t *dest, uint8_t *src,
2744                                   int field_based, int field_select,
2745                                   int src_x, int src_y,
2746                                   int width, int height, int stride,
2747                                   int h_edge_pos, int v_edge_pos,
2748                                   int w, int h, op_pixels_func *pix_op,
2749                                   int motion_x, int motion_y)
2750 {
2751     int dxy;
2752     int emu=0;
2753
2754     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2755     src_x += motion_x >> 1;
2756     src_y += motion_y >> 1;
2757
2758     /* WARNING: do no forget half pels */
2759     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2760     if (src_x == width)
2761         dxy &= ~1;
2762     src_y = clip(src_y, -16, height);
2763     if (src_y == height)
2764         dxy &= ~2;
2765     src += src_y * stride + src_x;
2766
2767     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2768         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2769            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2770             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2771                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2772             src= s->edge_emu_buffer;
2773             emu=1;
2774         }
2775     }
2776     if(field_select)
2777         src += s->linesize;
2778     pix_op[dxy](dest, src, stride, h);
2779     return emu;
2780 }
2781
2782 static inline int hpel_motion_lowres(MpegEncContext *s,
2783                                   uint8_t *dest, uint8_t *src,
2784                                   int field_based, int field_select,
2785                                   int src_x, int src_y,
2786                                   int width, int height, int stride,
2787                                   int h_edge_pos, int v_edge_pos,
2788                                   int w, int h, h264_chroma_mc_func *pix_op,
2789                                   int motion_x, int motion_y)
2790 {
2791     const int lowres= s->avctx->lowres;
2792     const int s_mask= (2<<lowres)-1;
2793     int emu=0;
2794     int sx, sy;
2795
2796     if(s->quarter_sample){
2797         motion_x/=2;
2798         motion_y/=2;
2799     }
2800
2801     sx= motion_x & s_mask;
2802     sy= motion_y & s_mask;
2803     src_x += motion_x >> (lowres+1);
2804     src_y += motion_y >> (lowres+1);
2805
2806     src += src_y * stride + src_x;
2807
2808     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2809        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2810         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2811                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2812         src= s->edge_emu_buffer;
2813         emu=1;
2814     }
2815
2816     sx <<= 2 - lowres;
2817     sy <<= 2 - lowres;
2818     if(field_select)
2819         src += s->linesize;
2820     pix_op[lowres](dest, src, stride, h, sx, sy);
2821     return emu;
2822 }
2823
2824 /* apply one mpeg motion vector to the three components */
2825 static always_inline void mpeg_motion(MpegEncContext *s,
2826                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2827                                int field_based, int bottom_field, int field_select,
2828                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2829                                int motion_x, int motion_y, int h)
2830 {
2831     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2832     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2833
2834 #if 0
2835 if(s->quarter_sample)
2836 {
2837     motion_x>>=1;
2838     motion_y>>=1;
2839 }
2840 #endif
2841
2842     v_edge_pos = s->v_edge_pos >> field_based;
2843     linesize   = s->current_picture.linesize[0] << field_based;
2844     uvlinesize = s->current_picture.linesize[1] << field_based;
2845
2846     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2847     src_x = s->mb_x* 16               + (motion_x >> 1);
2848     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2849
2850     if (s->out_format == FMT_H263) {
2851         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2852             mx = (motion_x>>1)|(motion_x&1);
2853             my = motion_y >>1;
2854             uvdxy = ((my & 1) << 1) | (mx & 1);
2855             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2856             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2857         }else{
2858             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2859             uvsrc_x = src_x>>1;
2860             uvsrc_y = src_y>>1;
2861         }
2862     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2863         mx = motion_x / 4;
2864         my = motion_y / 4;
2865         uvdxy = 0;
2866         uvsrc_x = s->mb_x*8 + mx;
2867         uvsrc_y = s->mb_y*8 + my;
2868     } else {
2869         if(s->chroma_y_shift){
2870             mx = motion_x / 2;
2871             my = motion_y / 2;
2872             uvdxy = ((my & 1) << 1) | (mx & 1);
2873             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2874             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2875         } else {
2876             if(s->chroma_x_shift){
2877             //Chroma422
2878                 mx = motion_x / 2;
2879                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2880                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2881                 uvsrc_y = src_y;
2882             } else {
2883             //Chroma444
2884                 uvdxy = dxy;
2885                 uvsrc_x = src_x;
2886                 uvsrc_y = src_y;
2887             }
2888         }
2889     }
2890
2891     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2892     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2893     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2894
2895     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2896        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2897             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2898                s->codec_id == CODEC_ID_MPEG1VIDEO){
2899                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2900                 return ;
2901             }
2902             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2903                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2904             ptr_y = s->edge_emu_buffer;
2905             if(!(s->flags&CODEC_FLAG_GRAY)){
2906                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2907                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2908                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2909                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2910                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2911                 ptr_cb= uvbuf;
2912                 ptr_cr= uvbuf+16;
2913             }
2914     }
2915
2916     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2917         dest_y += s->linesize;
2918         dest_cb+= s->uvlinesize;
2919         dest_cr+= s->uvlinesize;
2920     }
2921
2922     if(field_select){
2923         ptr_y += s->linesize;
2924         ptr_cb+= s->uvlinesize;
2925         ptr_cr+= s->uvlinesize;
2926     }
2927
2928     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2929
2930     if(!(s->flags&CODEC_FLAG_GRAY)){
2931         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2932         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2933     }
2934 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2935     if(s->out_format == FMT_H261){
2936         ff_h261_loop_filter(s);
2937     }
2938 #endif
2939 }
2940
2941 /* apply one mpeg motion vector to the three components */
2942 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2943                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2944                                int field_based, int bottom_field, int field_select,
2945                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2946                                int motion_x, int motion_y, int h)
2947 {
2948     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2949     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2950     const int lowres= s->avctx->lowres;
2951     const int block_s= 8>>lowres;
2952     const int s_mask= (2<<lowres)-1;
2953     const int h_edge_pos = s->h_edge_pos >> lowres;
2954     const int v_edge_pos = s->v_edge_pos >> lowres;
2955     linesize   = s->current_picture.linesize[0] << field_based;
2956     uvlinesize = s->current_picture.linesize[1] << field_based;
2957
2958     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2959         motion_x/=2;
2960         motion_y/=2;
2961     }
2962
2963     if(field_based){
2964         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2965     }
2966
2967     sx= motion_x & s_mask;
2968     sy= motion_y & s_mask;
2969     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2970     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2971
2972     if (s->out_format == FMT_H263) {
2973         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2974         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2975         uvsrc_x = src_x>>1;
2976         uvsrc_y = src_y>>1;
2977     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2978         mx = motion_x / 4;
2979         my = motion_y / 4;
2980         uvsx = (2*mx) & s_mask;
2981         uvsy = (2*my) & s_mask;
2982         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2983         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2984     } else {
2985         mx = motion_x / 2;
2986         my = motion_y / 2;
2987         uvsx = mx & s_mask;
2988         uvsy = my & s_mask;
2989         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2990         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2991     }
2992
2993     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2994     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2995     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2996
2997     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2998        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2999             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3000                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3001             ptr_y = s->edge_emu_buffer;
3002             if(!(s->flags&CODEC_FLAG_GRAY)){
3003                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3004                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3005                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3006                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3007                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3008                 ptr_cb= uvbuf;
3009                 ptr_cr= uvbuf+16;
3010             }
3011     }
3012
3013     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3014         dest_y += s->linesize;
3015         dest_cb+= s->uvlinesize;
3016         dest_cr+= s->uvlinesize;
3017     }
3018
3019     if(field_select){
3020         ptr_y += s->linesize;
3021         ptr_cb+= s->uvlinesize;
3022         ptr_cr+= s->uvlinesize;
3023     }
3024
3025     sx <<= 2 - lowres;
3026     sy <<= 2 - lowres;
3027     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3028
3029     if(!(s->flags&CODEC_FLAG_GRAY)){
3030         uvsx <<= 2 - lowres;
3031         uvsy <<= 2 - lowres;
3032         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3033         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3034     }
3035     //FIXME h261 lowres loop filter
3036 }
3037
3038 //FIXME move to dsputil, avg variant, 16x16 version
3039 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3040     int x;
3041     uint8_t * const top   = src[1];
3042     uint8_t * const left  = src[2];
3043     uint8_t * const mid   = src[0];
3044     uint8_t * const right = src[3];
3045     uint8_t * const bottom= src[4];
3046 #define OBMC_FILTER(x, t, l, m, r, b)\
3047     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3048 #define OBMC_FILTER4(x, t, l, m, r, b)\
3049     OBMC_FILTER(x         , t, l, m, r, b);\
3050     OBMC_FILTER(x+1       , t, l, m, r, b);\
3051     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3052     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3053
3054     x=0;
3055     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3056     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3057     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3058     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3059     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3060     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3061     x+= stride;
3062     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3063     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3064     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3065     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3066     x+= stride;
3067     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3068     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3069     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3070     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3071     x+= 2*stride;
3072     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3073     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3074     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3075     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3076     x+= 2*stride;
3077     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3078     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3079     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3080     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3081     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3082     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3083     x+= stride;
3084     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3085     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3086     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3087     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3088 }
3089
3090 /* obmc for 1 8x8 luma block */
3091 static inline void obmc_motion(MpegEncContext *s,
3092                                uint8_t *dest, uint8_t *src,
3093                                int src_x, int src_y,
3094                                op_pixels_func *pix_op,
3095                                int16_t mv[5][2]/* mid top left right bottom*/)
3096 #define MID    0
3097 {
3098     int i;
3099     uint8_t *ptr[5];
3100
3101     assert(s->quarter_sample==0);
3102
3103     for(i=0; i<5; i++){
3104         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3105             ptr[i]= ptr[MID];
3106         }else{
3107             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3108             hpel_motion(s, ptr[i], src, 0, 0,
3109                         src_x, src_y,
3110                         s->width, s->height, s->linesize,
3111                         s->h_edge_pos, s->v_edge_pos,
3112                         8, 8, pix_op,
3113                         mv[i][0], mv[i][1]);
3114         }
3115     }
3116
3117     put_obmc(dest, ptr, s->linesize);
3118 }
3119
3120 static inline void qpel_motion(MpegEncContext *s,
3121                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3122                                int field_based, int bottom_field, int field_select,
3123                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3124                                qpel_mc_func (*qpix_op)[16],
3125                                int motion_x, int motion_y, int h)
3126 {
3127     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3128     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3129
3130     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3131     src_x = s->mb_x *  16                 + (motion_x >> 2);
3132     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3133
3134     v_edge_pos = s->v_edge_pos >> field_based;
3135     linesize = s->linesize << field_based;
3136     uvlinesize = s->uvlinesize << field_based;
3137
3138     if(field_based){
3139         mx= motion_x/2;
3140         my= motion_y>>1;
3141     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3142         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3143         mx= (motion_x>>1) + rtab[motion_x&7];
3144         my= (motion_y>>1) + rtab[motion_y&7];
3145     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3146         mx= (motion_x>>1)|(motion_x&1);
3147         my= (motion_y>>1)|(motion_y&1);
3148     }else{
3149         mx= motion_x/2;
3150         my= motion_y/2;
3151     }
3152     mx= (mx>>1)|(mx&1);
3153     my= (my>>1)|(my&1);
3154
3155     uvdxy= (mx&1) | ((my&1)<<1);
3156     mx>>=1;
3157     my>>=1;
3158
3159     uvsrc_x = s->mb_x *  8                 + mx;
3160     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3161
3162     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3163     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3164     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3165
3166     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3167        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3168         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3169                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3170         ptr_y= s->edge_emu_buffer;
3171         if(!(s->flags&CODEC_FLAG_GRAY)){
3172             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3173             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3174                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3175             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3176                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3177             ptr_cb= uvbuf;
3178             ptr_cr= uvbuf + 16;
3179         }
3180     }
3181
3182     if(!field_based)
3183         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3184     else{
3185         if(bottom_field){
3186             dest_y += s->linesize;
3187             dest_cb+= s->uvlinesize;
3188             dest_cr+= s->uvlinesize;
3189         }
3190
3191         if(field_select){
3192             ptr_y  += s->linesize;
3193             ptr_cb += s->uvlinesize;
3194             ptr_cr += s->uvlinesize;
3195         }
3196         //damn interlaced mode
3197         //FIXME boundary mirroring is not exactly correct here
3198         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3199         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3200     }
3201     if(!(s->flags&CODEC_FLAG_GRAY)){
3202         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3203         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3204     }
3205 }
3206
3207 inline int ff_h263_round_chroma(int x){
3208     if (x >= 0)
3209         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3210     else {
3211         x = -x;
3212         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3213     }
3214 }
3215
3216 /**
3217  * h263 chorma 4mv motion compensation.
3218  */
3219 static inline void chroma_4mv_motion(MpegEncContext *s,
3220                                      uint8_t *dest_cb, uint8_t *dest_cr,
3221                                      uint8_t **ref_picture,
3222                                      op_pixels_func *pix_op,
3223                                      int mx, int my){
3224     int dxy, emu=0, src_x, src_y, offset;
3225     uint8_t *ptr;
3226
3227     /* In case of 8X8, we construct a single chroma motion vector
3228        with a special rounding */
3229     mx= ff_h263_round_chroma(mx);
3230     my= ff_h263_round_chroma(my);
3231
3232     dxy = ((my & 1) << 1) | (mx & 1);
3233     mx >>= 1;
3234     my >>= 1;
3235
3236     src_x = s->mb_x * 8 + mx;
3237     src_y = s->mb_y * 8 + my;
3238     src_x = clip(src_x, -8, s->width/2);
3239     if (src_x == s->width/2)
3240         dxy &= ~1;
3241     src_y = clip(src_y, -8, s->height/2);
3242     if (src_y == s->height/2)
3243         dxy &= ~2;
3244
3245     offset = (src_y * (s->uvlinesize)) + src_x;
3246     ptr = ref_picture[1] + offset;
3247     if(s->flags&CODEC_FLAG_EMU_EDGE){
3248         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3249            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3250             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3251             ptr= s->edge_emu_buffer;
3252             emu=1;
3253         }
3254     }
3255     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3256
3257     ptr = ref_picture[2] + offset;
3258     if(emu){
3259         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3260         ptr= s->edge_emu_buffer;
3261     }
3262     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3263 }
3264
3265 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3266                                      uint8_t *dest_cb, uint8_t *dest_cr,
3267                                      uint8_t **ref_picture,
3268                                      h264_chroma_mc_func *pix_op,
3269                                      int mx, int my){
3270     const int lowres= s->avctx->lowres;
3271     const int block_s= 8>>lowres;
3272     const int s_mask= (2<<lowres)-1;
3273     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3274     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3275     int emu=0, src_x, src_y, offset, sx, sy;
3276     uint8_t *ptr;
3277
3278     if(s->quarter_sample){
3279         mx/=2;
3280         my/=2;
3281     }
3282
3283     /* In case of 8X8, we construct a single chroma motion vector
3284        with a special rounding */
3285     mx= ff_h263_round_chroma(mx);
3286     my= ff_h263_round_chroma(my);
3287
3288     sx= mx & s_mask;
3289     sy= my & s_mask;
3290     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3291     src_y = s->mb_y*block_s + (my >> (lowres+1));
3292
3293     offset = src_y * s->uvlinesize + src_x;
3294     ptr = ref_picture[1] + offset;
3295     if(s->flags&CODEC_FLAG_EMU_EDGE){
3296         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3297            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3298             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3299             ptr= s->edge_emu_buffer;
3300             emu=1;
3301         }
3302     }
3303     sx <<= 2 - lowres;
3304     sy <<= 2 - lowres;
3305     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3306
3307     ptr = ref_picture[2] + offset;
3308     if(emu){
3309         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3310         ptr= s->edge_emu_buffer;
3311     }
3312     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3313 }
3314
3315 /**
3316  * motion compensation of a single macroblock
3317  * @param s context
3318  * @param dest_y luma destination pointer
3319  * @param dest_cb chroma cb/u destination pointer
3320  * @param dest_cr chroma cr/v destination pointer
3321  * @param dir direction (0->forward, 1->backward)
3322  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3323  * @param pic_op halfpel motion compensation function (average or put normally)
3324  * @param pic_op qpel motion compensation function (average or put normally)
3325  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3326  */
3327 static inline void MPV_motion(MpegEncContext *s,
3328                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3329                               int dir, uint8_t **ref_picture,
3330                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3331 {
3332     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3333     int mb_x, mb_y, i;
3334     uint8_t *ptr, *dest;
3335
3336     mb_x = s->mb_x;
3337     mb_y = s->mb_y;
3338
3339     if(s->obmc && s->pict_type != B_TYPE){
3340         int16_t mv_cache[4][4][2];
3341         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3342         const int mot_stride= s->b8_stride;
3343         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3344
3345         assert(!s->mb_skipped);
3346
3347         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3348         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3349         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3350
3351         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3352             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3353         }else{
3354             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3355         }
3356
3357         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3358             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3359             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3360         }else{
3361             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3362             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3363         }
3364
3365         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3366             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3367             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3368         }else{
3369             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3370             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3371         }
3372
3373         mx = 0;
3374         my = 0;
3375         for(i=0;i<4;i++) {
3376             const int x= (i&1)+1;
3377             const int y= (i>>1)+1;
3378             int16_t mv[5][2]= {
3379                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3380                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3381                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3382                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3383                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3384             //FIXME cleanup
3385             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3386                         ref_picture[0],
3387                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3388                         pix_op[1],
3389                         mv);
3390
3391             mx += mv[0][0];
3392             my += mv[0][1];
3393         }
3394         if(!(s->flags&CODEC_FLAG_GRAY))
3395             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3396
3397         return;
3398     }
3399
3400     switch(s->mv_type) {
3401     case MV_TYPE_16X16:
3402         if(s->mcsel){
3403             if(s->real_sprite_warping_points==1){
3404                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3405                             ref_picture);
3406             }else{
3407                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3408                             ref_picture);
3409             }
3410         }else if(s->quarter_sample){
3411             qpel_motion(s, dest_y, dest_cb, dest_cr,
3412                         0, 0, 0,
3413                         ref_picture, pix_op, qpix_op,
3414                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3415         }else if(s->mspel){
3416             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3417                         ref_picture, pix_op,
3418                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3419         }else
3420         {
3421             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3422                         0, 0, 0,
3423                         ref_picture, pix_op,
3424                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3425         }
3426         break;
3427     case MV_TYPE_8X8:
3428         mx = 0;
3429         my = 0;
3430         if(s->quarter_sample){
3431             for(i=0;i<4;i++) {
3432                 motion_x = s->mv[dir][i][0];
3433                 motion_y = s->mv[dir][i][1];
3434
3435                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3436                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3437                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3438
3439                 /* WARNING: do no forget half pels */
3440                 src_x = clip(src_x, -16, s->width);
3441                 if (src_x == s->width)
3442                     dxy &= ~3;
3443                 src_y = clip(src_y, -16, s->height);
3444                 if (src_y == s->height)
3445                     dxy &= ~12;
3446
3447                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3448                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3449                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3450                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3451                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3452                         ptr= s->edge_emu_buffer;
3453                     }
3454                 }
3455                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3456                 qpix_op[1][dxy](dest, ptr, s->linesize);
3457
3458                 mx += s->mv[dir][i][0]/2;
3459                 my += s->mv[dir][i][1]/2;
3460             }
3461         }else{
3462             for(i=0;i<4;i++) {
3463                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3464                             ref_picture[0], 0, 0,
3465                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3466                             s->width, s->height, s->linesize,
3467                             s->h_edge_pos, s->v_edge_pos,
3468                             8, 8, pix_op[1],
3469                             s->mv[dir][i][0], s->mv[dir][i][1]);
3470
3471                 mx += s->mv[dir][i][0];
3472                 my += s->mv[dir][i][1];
3473             }
3474         }
3475
3476         if(!(s->flags&CODEC_FLAG_GRAY))
3477             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3478         break;
3479     case MV_TYPE_FIELD:
3480         if (s->picture_structure == PICT_FRAME) {
3481             if(s->quarter_sample){
3482                 for(i=0; i<2; i++){
3483                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3484                                 1, i, s->field_select[dir][i],
3485                                 ref_picture, pix_op, qpix_op,
3486                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3487                 }
3488             }else{
3489                 /* top field */
3490                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3491                             1, 0, s->field_select[dir][0],
3492                             ref_picture, pix_op,
3493                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3494                 /* bottom field */
3495                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3496                             1, 1, s->field_select[dir][1],
3497                             ref_picture, pix_op,
3498                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3499             }
3500         } else {
3501             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3502                 ref_picture= s->current_picture_ptr->data;
3503             }
3504
3505             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3506                         0, 0, s->field_select[dir][0],
3507                         ref_picture, pix_op,
3508                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3509         }
3510         break;
3511     case MV_TYPE_16X8:
3512         for(i=0; i<2; i++){
3513             uint8_t ** ref2picture;
3514
3515             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3516                 ref2picture= ref_picture;
3517             }else{
3518                 ref2picture= s->current_picture_ptr->data;
3519             }
3520
3521             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3522                         0, 0, s->field_select[dir][i],
3523                         ref2picture, pix_op,
3524                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3525
3526             dest_y += 16*s->linesize;
3527             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3528             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3529         }
3530         break;
3531     case MV_TYPE_DMV:
3532         if(s->picture_structure == PICT_FRAME){
3533             for(i=0; i<2; i++){
3534                 int j;
3535                 for(j=0; j<2; j++){
3536                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3537                                 1, j, j^i,
3538                                 ref_picture, pix_op,
3539                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3540                 }
3541                 pix_op = s->dsp.avg_pixels_tab;
3542             }
3543         }else{
3544             for(i=0; i<2; i++){
3545                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3546                             0, 0, s->picture_structure != i+1,
3547                             ref_picture, pix_op,
3548                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3549
3550                 // after put we make avg of the same block
3551                 pix_op=s->dsp.avg_pixels_tab;
3552
3553                 //opposite parity is always in the same frame if this is second field
3554                 if(!s->first_field){
3555                     ref_picture = s->current_picture_ptr->data;
3556                 }
3557             }
3558         }
3559     break;
3560     default: assert(0);
3561     }
3562 }
3563
3564 /**
3565  * motion compensation of a single macroblock
3566  * @param s context
3567  * @param dest_y luma destination pointer
3568  * @param dest_cb chroma cb/u destination pointer
3569  * @param dest_cr chroma cr/v destination pointer
3570  * @param dir direction (0->forward, 1->backward)
3571  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3572  * @param pic_op halfpel motion compensation function (average or put normally)
3573  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3574  */
3575 static inline void MPV_motion_lowres(MpegEncContext *s,
3576                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3577                               int dir, uint8_t **ref_picture,
3578                               h264_chroma_mc_func *pix_op)
3579 {
3580     int mx, my;
3581     int mb_x, mb_y, i;
3582     const int lowres= s->avctx->lowres;
3583     const int block_s= 8>>lowres;
3584
3585     mb_x = s->mb_x;
3586     mb_y = s->mb_y;
3587
3588     switch(s->mv_type) {
3589     case MV_TYPE_16X16:
3590         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3591                     0, 0, 0,
3592                     ref_picture, pix_op,
3593                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3594         break;
3595     case MV_TYPE_8X8:
3596         mx = 0;
3597         my = 0;
3598             for(i=0;i<4;i++) {
3599                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3600                             ref_picture[0], 0, 0,
3601                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3602                             s->width, s->height, s->linesize,
3603                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3604                             block_s, block_s, pix_op,
3605                             s->mv[dir][i][0], s->mv[dir][i][1]);
3606
3607                 mx += s->mv[dir][i][0];
3608                 my += s->mv[dir][i][1];
3609             }
3610
3611         if(!(s->flags&CODEC_FLAG_GRAY))
3612             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3613         break;
3614     case MV_TYPE_FIELD:
3615         if (s->picture_structure == PICT_FRAME) {
3616             /* top field */
3617             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3618                         1, 0, s->field_select[dir][0],
3619                         ref_picture, pix_op,
3620                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3621             /* bottom field */
3622             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3623                         1, 1, s->field_select[dir][1],
3624                         ref_picture, pix_op,
3625                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3626         } else {
3627             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3628                 ref_picture= s->current_picture_ptr->data;
3629             }
3630
3631             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3632                         0, 0, s->field_select[dir][0],
3633                         ref_picture, pix_op,
3634                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3635         }
3636         break;
3637     case MV_TYPE_16X8:
3638         for(i=0; i<2; i++){
3639             uint8_t ** ref2picture;
3640
3641             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3642                 ref2picture= ref_picture;
3643             }else{
3644                 ref2picture= s->current_picture_ptr->data;
3645             }
3646
3647             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3648                         0, 0, s->field_select[dir][i],
3649                         ref2picture, pix_op,
3650                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3651
3652             dest_y += 2*block_s*s->linesize;
3653             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3654             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3655         }
3656         break;
3657     case MV_TYPE_DMV:
3658         if(s->picture_structure == PICT_FRAME){
3659             for(i=0; i<2; i++){
3660                 int j;
3661                 for(j=0; j<2; j++){
3662                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3663                                 1, j, j^i,
3664                                 ref_picture, pix_op,
3665                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3666                 }
3667                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3668             }
3669         }else{
3670             for(i=0; i<2; i++){
3671                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3672                             0, 0, s->picture_structure != i+1,
3673                             ref_picture, pix_op,
3674                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3675
3676                 // after put we make avg of the same block
3677                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3678
3679                 //opposite parity is always in the same frame if this is second field
3680                 if(!s->first_field){
3681                     ref_picture = s->current_picture_ptr->data;
3682                 }
3683             }
3684         }
3685     break;
3686     default: assert(0);
3687     }
3688 }
3689
3690 /* put block[] to dest[] */
3691 static inline void put_dct(MpegEncContext *s,
3692                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3693 {
3694     s->dct_unquantize_intra(s, block, i, qscale);
3695     s->dsp.idct_put (dest, line_size, block);
3696 }
3697
3698 /* add block[] to dest[] */
3699 static inline void add_dct(MpegEncContext *s,
3700                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3701 {
3702     if (s->block_last_index[i] >= 0) {
3703         s->dsp.idct_add (dest, line_size, block);
3704     }
3705 }
3706
3707 static inline void add_dequant_dct(MpegEncContext *s,
3708                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3709 {
3710     if (s->block_last_index[i] >= 0) {
3711         s->dct_unquantize_inter(s, block, i, qscale);
3712
3713         s->dsp.idct_add (dest, line_size, block);
3714     }
3715 }
3716
3717 /**
3718  * cleans dc, ac, coded_block for the current non intra MB
3719  */
3720 void ff_clean_intra_table_entries(MpegEncContext *s)
3721 {
3722     int wrap = s->b8_stride;
3723     int xy = s->block_index[0];
3724
3725     s->dc_val[0][xy           ] =
3726     s->dc_val[0][xy + 1       ] =
3727     s->dc_val[0][xy     + wrap] =
3728     s->dc_val[0][xy + 1 + wrap] = 1024;
3729     /* ac pred */
3730     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3731     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3732     if (s->msmpeg4_version>=3) {
3733         s->coded_block[xy           ] =
3734         s->coded_block[xy + 1       ] =
3735         s->coded_block[xy     + wrap] =
3736         s->coded_block[xy + 1 + wrap] = 0;
3737     }
3738     /* chroma */
3739     wrap = s->mb_stride;
3740     xy = s->mb_x + s->mb_y * wrap;
3741     s->dc_val[1][xy] =
3742     s->dc_val[2][xy] = 1024;
3743     /* ac pred */
3744     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3745     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3746
3747     s->mbintra_table[xy]= 0;
3748 }
3749
3750 /* generic function called after a macroblock has been parsed by the
3751    decoder or after it has been encoded by the encoder.
3752
3753    Important variables used:
3754    s->mb_intra : true if intra macroblock
3755    s->mv_dir   : motion vector direction
3756    s->mv_type  : motion vector type
3757    s->mv       : motion vector
3758    s->interlaced_dct : true if interlaced dct used (mpeg2)
3759  */
3760 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3761 {
3762     int mb_x, mb_y;
3763     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3764 #ifdef HAVE_XVMC
3765     if(s->avctx->xvmc_acceleration){
3766         XVMC_decode_mb(s);//xvmc uses pblocks
3767         return;
3768     }
3769 #endif
3770
3771     mb_x = s->mb_x;
3772     mb_y = s->mb_y;
3773
3774     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3775        /* save DCT coefficients */
3776        int i,j;
3777        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3778        for(i=0; i<6; i++)
3779            for(j=0; j<64; j++)
3780                *dct++ = block[i][s->dsp.idct_permutation[j]];
3781     }
3782
3783     s->current_picture.qscale_table[mb_xy]= s->qscale;
3784
3785     /* update DC predictors for P macroblocks */
3786     if (!s->mb_intra) {
3787         if (s->h263_pred || s->h263_aic) {
3788             if(s->mbintra_table[mb_xy])
3789                 ff_clean_intra_table_entries(s);
3790         } else {
3791             s->last_dc[0] =
3792             s->last_dc[1] =
3793             s->last_dc[2] = 128 << s->intra_dc_precision;
3794         }
3795     }
3796     else if (s->h263_pred || s->h263_aic)
3797         s->mbintra_table[mb_xy]=1;
3798
3799     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3800         uint8_t *dest_y, *dest_cb, *dest_cr;
3801         int dct_linesize, dct_offset;
3802         op_pixels_func (*op_pix)[4];
3803         qpel_mc_func (*op_qpix)[16];
3804         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3805         const int uvlinesize= s->current_picture.linesize[1];
3806         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3807         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3808
3809         /* avoid copy if macroblock skipped in last frame too */
3810         /* skip only during decoding as we might trash the buffers during encoding a bit */
3811         if(!s->encoding){
3812             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3813             const int age= s->current_picture.age;
3814
3815             assert(age);
3816
3817             if (s->mb_skipped) {
3818                 s->mb_skipped= 0;
3819                 assert(s->pict_type!=I_TYPE);
3820
3821                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3822                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3823
3824                 /* if previous was skipped too, then nothing to do !  */
3825                 if (*mbskip_ptr >= age && s->current_picture.reference){
3826                     return;
3827                 }
3828             } else if(!s->current_picture.reference){
3829                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3830                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3831             } else{
3832                 *mbskip_ptr = 0; /* not skipped */
3833             }
3834         }
3835
3836         dct_linesize = linesize << s->interlaced_dct;
3837         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3838
3839         if(readable){
3840             dest_y=  s->dest[0];
3841             dest_cb= s->dest[1];
3842             dest_cr= s->dest[2];
3843         }else{
3844             dest_y = s->b_scratchpad;
3845             dest_cb= s->b_scratchpad+16*linesize;
3846             dest_cr= s->b_scratchpad+32*linesize;
3847         }
3848
3849         if (!s->mb_intra) {
3850             /* motion handling */
3851             /* decoding or more than one mb_type (MC was already done otherwise) */
3852             if(!s->encoding){
3853                 if(lowres_flag){
3854                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3855
3856                     if (s->mv_dir & MV_DIR_FORWARD) {
3857                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3858                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3859                     }
3860                     if (s->mv_dir & MV_DIR_BACKWARD) {
3861                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3862                     }
3863                 }else{
3864                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3865                         op_pix = s->dsp.put_pixels_tab;
3866                         op_qpix= s->dsp.put_qpel_pixels_tab;
3867                     }else{
3868                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3869                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3870                     }
3871                     if (s->mv_dir & MV_DIR_FORWARD) {
3872                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3873                         op_pix = s->dsp.avg_pixels_tab;
3874                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3875                     }
3876                     if (s->mv_dir & MV_DIR_BACKWARD) {
3877                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3878                     }
3879                 }
3880             }
3881
3882             /* skip dequant / idct if we are really late ;) */
3883             if(s->hurry_up>1) goto skip_idct;
3884             if(s->avctx->skip_idct){
3885                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3886                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3887                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3888                     goto skip_idct;
3889             }
3890
3891             /* add dct residue */
3892             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3893                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3894                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3895                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3896                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3897                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3898
3899                 if(!(s->flags&CODEC_FLAG_GRAY)){
3900                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3901                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3902                 }
3903             } else if(s->codec_id != CODEC_ID_WMV2){
3904                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3905                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3906                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3907                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3908
3909                 if(!(s->flags&CODEC_FLAG_GRAY)){
3910                     if(s->chroma_y_shift){//Chroma420
3911                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3912                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3913                     }else{
3914                         //chroma422
3915                         dct_linesize = uvlinesize << s->interlaced_dct;
3916                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3917
3918                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3919                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3920                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3921                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3922                         if(!s->chroma_x_shift){//Chroma444
3923                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3924                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3925                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3926                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3927                         }
3928                     }
3929                 }//fi gray
3930             }
3931             else{
3932                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3933             }
3934         } else {
3935             /* dct only in intra block */
3936             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3937                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3938                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3939                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3940                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3941
3942                 if(!(s->flags&CODEC_FLAG_GRAY)){
3943                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3944                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3945                 }
3946             }else{
3947                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3948                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3949                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3950                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3951
3952                 if(!(s->flags&CODEC_FLAG_GRAY)){
3953                     if(s->chroma_y_shift){
3954                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3955                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3956                     }else{
3957
3958                         dct_linesize = uvlinesize << s->interlaced_dct;
3959                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3960
3961                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3962                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3963                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3964                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3965                         if(!s->chroma_x_shift){//Chroma444
3966                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3967                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3968                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3969                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3970                         }
3971                     }
3972                 }//gray
3973             }
3974         }
3975 skip_idct:
3976         if(!readable){
3977             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3978             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3979             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3980         }
3981     }
3982 }
3983
3984 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3985     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3986     else                  MPV_decode_mb_internal(s, block, 0);
3987 }
3988
3989 #ifdef CONFIG_ENCODERS
3990
3991 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3992 {
3993     static const char tab[64]=
3994         {3,2,2,1,1,1,1,1,
3995          1,1,1,1,1,1,1,1,
3996          1,1,1,1,1,1,1,1,
3997          0,0,0,0,0,0,0,0,
3998          0,0,0,0,0,0,0,0,
3999          0,0,0,0,0,0,0,0,
4000          0,0,0,0,0,0,0,0,
4001          0,0,0,0,0,0,0,0};
4002     int score=0;
4003     int run=0;
4004     int i;
4005     DCTELEM *block= s->block[n];
4006     const int last_index= s->block_last_index[n];
4007     int skip_dc;
4008
4009     if(threshold<0){
4010         skip_dc=0;
4011         threshold= -threshold;
4012     }else
4013         skip_dc=1;
4014
4015     /* are all which we could set to zero are allready zero? */
4016     if(last_index<=skip_dc - 1) return;
4017
4018     for(i=0; i<=last_index; i++){
4019         const int j = s->intra_scantable.permutated[i];
4020         const int level = ABS(block[j]);
4021         if(level==1){
4022             if(skip_dc && i==0) continue;
4023             score+= tab[run];
4024             run=0;
4025         }else if(level>1){
4026             return;
4027         }else{
4028             run++;
4029         }
4030     }
4031     if(score >= threshold) return;
4032     for(i=skip_dc; i<=last_index; i++){
4033         const int j = s->intra_scantable.permutated[i];
4034         block[j]=0;
4035     }
4036     if(block[0]) s->block_last_index[n]= 0;
4037     else         s->block_last_index[n]= -1;
4038 }
4039
4040 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4041 {
4042     int i;
4043     const int maxlevel= s->max_qcoeff;
4044     const int minlevel= s->min_qcoeff;
4045     int overflow=0;
4046
4047     if(s->mb_intra){
4048         i=1; //skip clipping of intra dc
4049     }else
4050         i=0;
4051
4052     for(;i<=last_index; i++){
4053         const int j= s->intra_scantable.permutated[i];
4054         int level = block[j];
4055
4056         if     (level>maxlevel){
4057             level=maxlevel;
4058             overflow++;
4059         }else if(level<minlevel){
4060             level=minlevel;
4061             overflow++;
4062         }
4063
4064         block[j]= level;
4065     }
4066
4067     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4068         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4069 }
4070
4071 #endif //CONFIG_ENCODERS
4072
4073 /**
4074  *
4075  * @param h is the normal height, this will be reduced automatically if needed for the last row
4076  */
4077 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4078     if (s->avctx->draw_horiz_band) {
4079         AVFrame *src;
4080         int offset[4];
4081
4082         if(s->picture_structure != PICT_FRAME){
4083             h <<= 1;
4084             y <<= 1;
4085             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4086         }
4087
4088         h= FFMIN(h, s->avctx->height - y);
4089
4090         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4091             src= (AVFrame*)s->current_picture_ptr;
4092         else if(s->last_picture_ptr)
4093             src= (AVFrame*)s->last_picture_ptr;
4094         else
4095             return;
4096
4097         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4098             offset[0]=
4099             offset[1]=
4100             offset[2]=
4101             offset[3]= 0;
4102         }else{
4103             offset[0]= y * s->linesize;;
4104             offset[1]=
4105             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4106             offset[3]= 0;
4107         }
4108
4109         emms_c();
4110
4111         s->avctx->draw_horiz_band(s->avctx, src, offset,
4112                                   y, s->picture_structure, h);
4113     }
4114 }
4115
4116 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4117     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4118     const int uvlinesize= s->current_picture.linesize[1];
4119     const int mb_size= 4 - s->avctx->lowres;
4120
4121     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4122     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4123     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4124     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4125     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4126     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4127     //block_index is not used by mpeg2, so it is not affected by chroma_format
4128
4129     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4130     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4131     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4132
4133     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4134     {
4135         s->dest[0] += s->mb_y *   linesize << mb_size;
4136         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4137         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4138     }
4139 }
4140
4141 #ifdef CONFIG_ENCODERS
4142
4143 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4144     int x, y;
4145 //FIXME optimize
4146     for(y=0; y<8; y++){
4147         for(x=0; x<8; x++){
4148             int x2, y2;
4149             int sum=0;
4150             int sqr=0;
4151             int count=0;
4152
4153             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4154                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4155                     int v= ptr[x2 + y2*stride];
4156                     sum += v;
4157                     sqr += v*v;
4158                     count++;
4159                 }
4160             }
4161             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4162         }
4163     }
4164 }
4165
4166 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4167 {
4168     int16_t weight[6][64];
4169     DCTELEM orig[6][64];
4170     const int mb_x= s->mb_x;
4171     const int mb_y= s->mb_y;
4172     int i;
4173     int skip_dct[6];
4174     int dct_offset   = s->linesize*8; //default for progressive frames
4175     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4176     int wrap_y, wrap_c;
4177
4178     for(i=0; i<6; i++) skip_dct[i]=0;
4179
4180     if(s->adaptive_quant){
4181         const int last_qp= s->qscale;
4182         const int mb_xy= mb_x + mb_y*s->mb_stride;
4183
4184         s->lambda= s->lambda_table[mb_xy];
4185         update_qscale(s);
4186
4187         if(!(s->flags&CODEC_FLAG_QP_RD)){
4188             s->dquant= s->qscale - last_qp;
4189
4190             if(s->out_format==FMT_H263){
4191                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4192
4193                 if(s->codec_id==CODEC_ID_MPEG4){
4194                     if(!s->mb_intra){
4195                         if(s->pict_type == B_TYPE){
4196                             if(s->dquant&1)
4197                                 s->dquant= (s->dquant/2)*2;
4198                             if(s->mv_dir&MV_DIRECT)
4199                                 s->dquant= 0;
4200                         }
4201                         if(s->mv_type==MV_TYPE_8X8)
4202                             s->dquant=0;
4203                     }
4204                 }
4205             }
4206         }
4207         ff_set_qscale(s, last_qp + s->dquant);
4208     }else if(s->flags&CODEC_FLAG_QP_RD)
4209         ff_set_qscale(s, s->qscale + s->dquant);
4210
4211     wrap_y = s->linesize;
4212     wrap_c = s->uvlinesize;
4213     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4214     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4215     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4216
4217     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4218         uint8_t *ebuf= s->edge_emu_buffer + 32;
4219         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4220         ptr_y= ebuf;
4221         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4222         ptr_cb= ebuf+18*wrap_y;
4223         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4224         ptr_cr= ebuf+18*wrap_y+8;
4225     }
4226
4227     if (s->mb_intra) {
4228         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4229             int progressive_score, interlaced_score;
4230
4231             s->interlaced_dct=0;
4232             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4233                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4234
4235             if(progressive_score > 0){
4236                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4237                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4238                 if(progressive_score > interlaced_score){
4239                     s->interlaced_dct=1;
4240
4241                     dct_offset= wrap_y;
4242                     wrap_y<<=1;
4243                 }
4244             }
4245         }
4246
4247         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4248         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4249         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4250         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4251
4252         if(s->flags&CODEC_FLAG_GRAY){
4253             skip_dct[4]= 1;
4254             skip_dct[5]= 1;
4255         }else{
4256             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4257             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4258         }
4259     }else{
4260         op_pixels_func (*op_pix)[4];
4261         qpel_mc_func (*op_qpix)[16];
4262         uint8_t *dest_y, *dest_cb, *dest_cr;
4263
4264         dest_y  = s->dest[0];
4265         dest_cb = s->dest[1];
4266         dest_cr = s->dest[2];
4267
4268         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4269             op_pix = s->dsp.put_pixels_tab;
4270             op_qpix= s->dsp.put_qpel_pixels_tab;
4271         }else{
4272             op_pix = s->dsp.put_no_rnd_pixels_tab;
4273             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4274         }
4275
4276         if (s->mv_dir & MV_DIR_FORWARD) {
4277             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4278             op_pix = s->dsp.avg_pixels_tab;
4279             op_qpix= s->dsp.avg_qpel_pixels_tab;
4280         }
4281         if (s->mv_dir & MV_DIR_BACKWARD) {
4282             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4283         }
4284
4285         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4286             int progressive_score, interlaced_score;
4287
4288             s->interlaced_dct=0;
4289             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4290                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4291
4292             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4293
4294             if(progressive_score>0){
4295                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4296                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4297
4298                 if(progressive_score > interlaced_score){
4299                     s->interlaced_dct=1;
4300
4301                     dct_offset= wrap_y;
4302                     wrap_y<<=1;
4303                 }
4304             }
4305         }
4306
4307         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4308         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4309         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4310         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4311
4312         if(s->flags&CODEC_FLAG_GRAY){
4313             skip_dct[4]= 1;
4314             skip_dct[5]= 1;
4315         }else{
4316             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4317             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4318         }
4319         /* pre quantization */
4320         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4321             //FIXME optimize
4322             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4323             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4324             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4325             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4326             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4327             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4328         }
4329     }
4330
4331     if(s->avctx->quantizer_noise_shaping){
4332         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4333         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4334         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4335         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4336         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4337         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4338         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4339     }
4340
4341     /* DCT & quantize */
4342     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4343     {
4344         for(i=0;i<6;i++) {
4345             if(!skip_dct[i]){
4346                 int overflow;
4347                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4348             // FIXME we could decide to change to quantizer instead of clipping
4349             // JS: I don't think that would be a good idea it could lower quality instead
4350             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4351                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4352             }else
4353                 s->block_last_index[i]= -1;
4354         }
4355         if(s->avctx->quantizer_noise_shaping){
4356             for(i=0;i<6;i++) {
4357                 if(!skip_dct[i]){
4358                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4359                 }
4360             }
4361         }
4362
4363         if(s->luma_elim_threshold && !s->mb_intra)
4364             for(i=0; i<4; i++)
4365                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4366         if(s->chroma_elim_threshold && !s->mb_intra)
4367             for(i=4; i<6; i++)
4368                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4369
4370         if(s->flags & CODEC_FLAG_CBP_RD){
4371             for(i=0;i<6;i++) {
4372                 if(s->block_last_index[i] == -1)
4373                     s->coded_score[i]= INT_MAX/256;
4374             }
4375         }
4376     }
4377
4378     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4379         s->block_last_index[4]=
4380         s->block_last_index[5]= 0;
4381         s->block[4][0]=
4382         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4383     }
4384
4385     //non c quantize code returns incorrect block_last_index FIXME
4386     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4387         for(i=0; i<6; i++){
4388             int j;
4389             if(s->block_last_index[i]>0){
4390                 for(j=63; j>0; j--){
4391                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4392                 }
4393                 s->block_last_index[i]= j;
4394             }
4395         }
4396     }
4397
4398     /* huffman encode */
4399     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4400     case CODEC_ID_MPEG1VIDEO:
4401     case CODEC_ID_MPEG2VIDEO:
4402         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4403     case CODEC_ID_MPEG4:
4404         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4405     case CODEC_ID_MSMPEG4V2:
4406     case CODEC_ID_MSMPEG4V3:
4407     case CODEC_ID_WMV1:
4408         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4409     case CODEC_ID_WMV2:
4410          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4411 #ifdef CONFIG_H261_ENCODER
4412     case CODEC_ID_H261:
4413         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4414 #endif
4415     case CODEC_ID_H263:
4416     case CODEC_ID_H263P:
4417     case CODEC_ID_FLV1:
4418     case CODEC_ID_RV10:
4419     case CODEC_ID_RV20:
4420         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4421     case CODEC_ID_MJPEG:
4422         mjpeg_encode_mb(s, s->block); break;
4423     default:
4424         assert(0);
4425     }
4426 }
4427
4428 #endif //CONFIG_ENCODERS
4429
4430 void ff_mpeg_flush(AVCodecContext *avctx){
4431     int i;
4432     MpegEncContext *s = avctx->priv_data;
4433
4434     if(s==NULL || s->picture==NULL)
4435         return;
4436
4437     for(i=0; i<MAX_PICTURE_COUNT; i++){
4438        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4439                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4440         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4441     }
4442     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4443
4444     s->mb_x= s->mb_y= 0;
4445
4446     s->parse_context.state= -1;
4447     s->parse_context.frame_start_found= 0;
4448     s->parse_context.overread= 0;
4449     s->parse_context.overread_index= 0;
4450     s->parse_context.index= 0;
4451     s->parse_context.last_index= 0;
4452     s->bitstream_buffer_size=0;
4453 }
4454
4455 #ifdef CONFIG_ENCODERS
4456 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4457 {
4458     const uint16_t *srcw= (uint16_t*)src;
4459     int words= length>>4;
4460     int bits= length&15;
4461     int i;
4462
4463     if(length==0) return;
4464
4465     if(words < 16){
4466         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4467     }else if(put_bits_count(pb)&7){
4468         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4469     }else{
4470         for(i=0; put_bits_count(pb)&31; i++)
4471             put_bits(pb, 8, src[i]);
4472         flush_put_bits(pb);
4473         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4474         skip_put_bytes(pb, 2*words-i);
4475     }
4476
4477     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4478 }
4479
4480 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4481     int i;
4482
4483     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4484
4485     /* mpeg1 */
4486     d->mb_skip_run= s->mb_skip_run;
4487     for(i=0; i<3; i++)
4488         d->last_dc[i]= s->last_dc[i];
4489
4490     /* statistics */
4491     d->mv_bits= s->mv_bits;
4492     d->i_tex_bits= s->i_tex_bits;
4493     d->p_tex_bits= s->p_tex_bits;
4494     d->i_count= s->i_count;
4495     d->f_count= s->f_count;
4496     d->b_count= s->b_count;
4497     d->skip_count= s->skip_count;
4498     d->misc_bits= s->misc_bits;
4499     d->last_bits= 0;
4500
4501     d->mb_skipped= 0;
4502     d->qscale= s->qscale;
4503     d->dquant= s->dquant;
4504 }
4505
4506 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4507     int i;
4508
4509     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4510     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4511
4512     /* mpeg1 */
4513     d->mb_skip_run= s->mb_skip_run;
4514     for(i=0; i<3; i++)
4515         d->last_dc[i]= s->last_dc[i];
4516
4517     /* statistics */
4518     d->mv_bits= s->mv_bits;
4519     d->i_tex_bits= s->i_tex_bits;
4520     d->p_tex_bits= s->p_tex_bits;
4521     d->i_count= s->i_count;
4522     d->f_count= s->f_count;
4523     d->b_count= s->b_count;
4524     d->skip_count= s->skip_count;
4525     d->misc_bits= s->misc_bits;
4526
4527     d->mb_intra= s->mb_intra;
4528     d->mb_skipped= s->mb_skipped;
4529     d->mv_type= s->mv_type;
4530     d->mv_dir= s->mv_dir;
4531     d->pb= s->pb;
4532     if(s->data_partitioning){
4533         d->pb2= s->pb2;
4534         d->tex_pb= s->tex_pb;
4535     }
4536     d->block= s->block;
4537     for(i=0; i<6; i++)
4538         d->block_last_index[i]= s->block_last_index[i];
4539     d->interlaced_dct= s->interlaced_dct;
4540     d->qscale= s->qscale;
4541 }
4542
4543 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4544                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4545                            int *dmin, int *next_block, int motion_x, int motion_y)
4546 {
4547     int score;
4548     uint8_t *dest_backup[3];
4549
4550     copy_context_before_encode(s, backup, type);
4551
4552     s->block= s->blocks[*next_block];
4553     s->pb= pb[*next_block];
4554     if(s->data_partitioning){
4555         s->pb2   = pb2   [*next_block];
4556         s->tex_pb= tex_pb[*next_block];
4557     }
4558
4559     if(*next_block){
4560         memcpy(dest_backup, s->dest, sizeof(s->dest));
4561         s->dest[0] = s->rd_scratchpad;
4562         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4563         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4564         assert(s->linesize >= 32); //FIXME
4565     }
4566
4567     encode_mb(s, motion_x, motion_y);
4568
4569     score= put_bits_count(&s->pb);
4570     if(s->data_partitioning){
4571         score+= put_bits_count(&s->pb2);
4572         score+= put_bits_count(&s->tex_pb);
4573     }
4574
4575     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4576         MPV_decode_mb(s, s->block);
4577
4578         score *= s->lambda2;
4579         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4580     }
4581
4582     if(*next_block){
4583         memcpy(s->dest, dest_backup, sizeof(s->dest));
4584     }
4585
4586     if(score<*dmin){
4587         *dmin= score;
4588         *next_block^=1;
4589
4590         copy_context_after_encode(best, s, type);
4591     }
4592 }
4593
4594 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4595     uint32_t *sq = squareTbl + 256;
4596     int acc=0;
4597     int x,y;
4598
4599     if(w==16 && h==16)
4600         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4601     else if(w==8 && h==8)
4602         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4603
4604     for(y=0; y<h; y++){
4605         for(x=0; x<w; x++){
4606             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4607         }
4608     }
4609
4610     assert(acc>=0);
4611
4612     return acc;
4613 }
4614
4615 static int sse_mb(MpegEncContext *s){
4616     int w= 16;
4617     int h= 16;
4618
4619     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4620     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4621
4622     if(w==16 && h==16)
4623       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4624         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4625                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4626                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4627       }else{
4628         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4629                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4630                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4631       }
4632     else
4633         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4634                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4635                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4636 }
4637
4638 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4639     MpegEncContext *s= arg;
4640
4641
4642     s->me.pre_pass=1;
4643     s->me.dia_size= s->avctx->pre_dia_size;
4644     s->first_slice_line=1;
4645     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4646         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4647             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4648         }
4649         s->first_slice_line=0;
4650     }
4651
4652     s->me.pre_pass=0;
4653
4654     return 0;
4655 }
4656
4657 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4658     MpegEncContext *s= arg;
4659
4660     s->me.dia_size= s->avctx->dia_size;
4661     s->first_slice_line=1;
4662     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4663         s->mb_x=0; //for block init below
4664         ff_init_block_index(s);
4665         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4666             s->block_index[0]+=2;
4667             s->block_index[1]+=2;
4668             s->block_index[2]+=2;
4669             s->block_index[3]+=2;
4670
4671             /* compute motion vector & mb_type and store in context */
4672             if(s->pict_type==B_TYPE)
4673                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4674             else
4675                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4676         }
4677         s->first_slice_line=0;
4678     }
4679     return 0;
4680 }
4681
4682 static int mb_var_thread(AVCodecContext *c, void *arg){
4683     MpegEncContext *s= arg;
4684     int mb_x, mb_y;
4685
4686     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4687         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4688             int xx = mb_x * 16;
4689             int yy = mb_y * 16;
4690             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4691             int varc;
4692             int sum = s->dsp.pix_sum(pix, s->linesize);
4693
4694             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4695
4696             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4697             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4698             s->me.mb_var_sum_temp    += varc;
4699         }
4700     }
4701     return 0;
4702 }
4703
4704 static void write_slice_end(MpegEncContext *s){
4705     if(s->codec_id==CODEC_ID_MPEG4){
4706         if(s->partitioned_frame){
4707             ff_mpeg4_merge_partitions(s);
4708         }
4709
4710         ff_mpeg4_stuffing(&s->pb);
4711     }else if(s->out_format == FMT_MJPEG){
4712         ff_mjpeg_stuffing(&s->pb);
4713     }
4714
4715     align_put_bits(&s->pb);
4716     flush_put_bits(&s->pb);
4717
4718     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4719         s->misc_bits+= get_bits_diff(s);
4720 }
4721
4722 static int encode_thread(AVCodecContext *c, void *arg){
4723     MpegEncContext *s= arg;
4724     int mb_x, mb_y, pdif = 0;
4725     int i, j;
4726     MpegEncContext best_s, backup_s;
4727     uint8_t bit_buf[2][MAX_MB_BYTES];
4728     uint8_t bit_buf2[2][MAX_MB_BYTES];
4729     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4730     PutBitContext pb[2], pb2[2], tex_pb[2];
4731 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4732
4733     for(i=0; i<2; i++){
4734         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4735         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4736         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4737     }
4738
4739     s->last_bits= put_bits_count(&s->pb);
4740     s->mv_bits=0;
4741     s->misc_bits=0;
4742     s->i_tex_bits=0;
4743     s->p_tex_bits=0;
4744     s->i_count=0;
4745     s->f_count=0;
4746     s->b_count=0;
4747     s->skip_count=0;
4748
4749     for(i=0; i<3; i++){
4750         /* init last dc values */
4751         /* note: quant matrix value (8) is implied here */
4752         s->last_dc[i] = 128 << s->intra_dc_precision;
4753
4754         s->current_picture.error[i] = 0;
4755     }
4756     s->mb_skip_run = 0;
4757     memset(s->last_mv, 0, sizeof(s->last_mv));
4758
4759     s->last_mv_dir = 0;
4760
4761     switch(s->codec_id){
4762     case CODEC_ID_H263:
4763     case CODEC_ID_H263P:
4764     case CODEC_ID_FLV1:
4765         s->gob_index = ff_h263_get_gob_height(s);
4766         break;
4767     case CODEC_ID_MPEG4:
4768         if(s->partitioned_frame)
4769             ff_mpeg4_init_partitions(s);
4770         break;
4771     }
4772
4773     s->resync_mb_x=0;
4774     s->resync_mb_y=0;
4775     s->first_slice_line = 1;
4776     s->ptr_lastgob = s->pb.buf;
4777     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4778 //    printf("row %d at %X\n", s->mb_y, (int)s);
4779         s->mb_x=0;
4780         s->mb_y= mb_y;
4781
4782         ff_set_qscale(s, s->qscale);
4783         ff_init_block_index(s);
4784
4785         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4786             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4787             int mb_type= s->mb_type[xy];
4788 //            int d;
4789             int dmin= INT_MAX;
4790             int dir;
4791
4792             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4793                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4794                 return -1;
4795             }
4796             if(s->data_partitioning){
4797                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4798                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4799                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4800                     return -1;
4801                 }
4802             }
4803
4804             s->mb_x = mb_x;
4805             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4806             ff_update_block_index(s);
4807
4808 #ifdef CONFIG_H261_ENCODER
4809             if(s->codec_id == CODEC_ID_H261){
4810                 ff_h261_reorder_mb_index(s);
4811                 xy= s->mb_y*s->mb_stride + s->mb_x;
4812                 mb_type= s->mb_type[xy];
4813             }
4814 #endif
4815
4816             /* write gob / video packet header  */
4817             if(s->rtp_mode){
4818                 int current_packet_size, is_gob_start;
4819
4820                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4821
4822                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4823
4824                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4825
4826                 switch(s->codec_id){
4827                 case CODEC_ID_H263:
4828                 case CODEC_ID_H263P:
4829                     if(!s->h263_slice_structured)
4830                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4831                     break;
4832                 case CODEC_ID_MPEG2VIDEO:
4833                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4834                 case CODEC_ID_MPEG1VIDEO:
4835                     if(s->mb_skip_run) is_gob_start=0;
4836                     break;
4837                 }
4838
4839                 if(is_gob_start){
4840                     if(s->start_mb_y != mb_y || mb_x!=0){
4841                         write_slice_end(s);
4842
4843                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4844                             ff_mpeg4_init_partitions(s);
4845                         }
4846                     }
4847
4848                     assert((put_bits_count(&s->pb)&7) == 0);
4849                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4850
4851                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4852                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4853                         int d= 100 / s->avctx->error_rate;
4854                         if(r % d == 0){
4855                             current_packet_size=0;
4856 #ifndef ALT_BITSTREAM_WRITER
4857                             s->pb.buf_ptr= s->ptr_lastgob;
4858 #endif
4859                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4860                         }
4861                     }
4862
4863                     if (s->avctx->rtp_callback){
4864                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4865                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4866                     }
4867
4868                     switch(s->codec_id){
4869                     case CODEC_ID_MPEG4:
4870                         ff_mpeg4_encode_video_packet_header(s);
4871                         ff_mpeg4_clean_buffers(s);
4872                     break;
4873                     case CODEC_ID_MPEG1VIDEO:
4874                     case CODEC_ID_MPEG2VIDEO:
4875                         ff_mpeg1_encode_slice_header(s);
4876                         ff_mpeg1_clean_buffers(s);
4877                     break;
4878                     case CODEC_ID_H263:
4879                     case CODEC_ID_H263P:
4880                         h263_encode_gob_header(s, mb_y);
4881                     break;
4882                     }
4883
4884                     if(s->flags&CODEC_FLAG_PASS1){
4885                         int bits= put_bits_count(&s->pb);
4886                         s->misc_bits+= bits - s->last_bits;
4887                         s->last_bits= bits;
4888                     }
4889
4890                     s->ptr_lastgob += current_packet_size;
4891                     s->first_slice_line=1;
4892                     s->resync_mb_x=mb_x;
4893                     s->resync_mb_y=mb_y;
4894                 }
4895             }
4896
4897             if(  (s->resync_mb_x   == s->mb_x)
4898                && s->resync_mb_y+1 == s->mb_y){
4899                 s->first_slice_line=0;
4900             }
4901
4902             s->mb_skipped=0;
4903             s->dquant=0; //only for QP_RD
4904
4905             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4906                 int next_block=0;
4907                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4908
4909                 copy_context_before_encode(&backup_s, s, -1);
4910                 backup_s.pb= s->pb;
4911                 best_s.data_partitioning= s->data_partitioning;
4912                 best_s.partitioned_frame= s->partitioned_frame;
4913                 if(s->data_partitioning){
4914                     backup_s.pb2= s->pb2;
4915                     backup_s.tex_pb= s->tex_pb;
4916                 }
4917
4918                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4919                     s->mv_dir = MV_DIR_FORWARD;
4920                     s->mv_type = MV_TYPE_16X16;
4921                     s->mb_intra= 0;
4922                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4923                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4924                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4925                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4926                 }
4927                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4928                     s->mv_dir = MV_DIR_FORWARD;
4929                     s->mv_type = MV_TYPE_FIELD;
4930                     s->mb_intra= 0;
4931                     for(i=0; i<2; i++){
4932                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4933                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4934                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4935                     }
4936                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4937                                  &dmin, &next_block, 0, 0);
4938                 }
4939                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4940                     s->mv_dir = MV_DIR_FORWARD;
4941                     s->mv_type = MV_TYPE_16X16;
4942                     s->mb_intra= 0;
4943                     s->mv[0][0][0] = 0;
4944                     s->mv[0][0][1] = 0;
4945                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4946                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4947                 }
4948                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4949                     s->mv_dir = MV_DIR_FORWARD;
4950                     s->mv_type = MV_TYPE_8X8;
4951                     s->mb_intra= 0;
4952                     for(i=0; i<4; i++){
4953                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4954                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4955                     }
4956                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4957                                  &dmin, &next_block, 0, 0);
4958                 }
4959                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4960                     s->mv_dir = MV_DIR_FORWARD;
4961                     s->mv_type = MV_TYPE_16X16;
4962                     s->mb_intra= 0;
4963                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4964                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4965                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4966                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4967                 }
4968                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4969                     s->mv_dir = MV_DIR_BACKWARD;
4970                     s->mv_type = MV_TYPE_16X16;
4971                     s->mb_intra= 0;
4972                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4973                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4974                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4975                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4976                 }
4977                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4978                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4979                     s->mv_type = MV_TYPE_16X16;
4980                     s->mb_intra= 0;
4981                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4982                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4983                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4984                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4985                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4986                                  &dmin, &next_block, 0, 0);
4987                 }
4988                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4989                     int mx= s->b_direct_mv_table[xy][0];
4990                     int my= s->b_direct_mv_table[xy][1];
4991
4992                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4993                     s->mb_intra= 0;
4994                     ff_mpeg4_set_direct_mv(s, mx, my);
4995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4996                                  &dmin, &next_block, mx, my);
4997                 }
4998                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4999                     s->mv_dir = MV_DIR_FORWARD;
5000                     s->mv_type = MV_TYPE_FIELD;
5001                     s->mb_intra= 0;
5002                     for(i=0; i<2; i++){
5003                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5004                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5005                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5006                     }
5007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5008                                  &dmin, &next_block, 0, 0);
5009                 }
5010                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5011                     s->mv_dir = MV_DIR_BACKWARD;
5012                     s->mv_type = MV_TYPE_FIELD;
5013                     s->mb_intra= 0;
5014                     for(i=0; i<2; i++){
5015                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5016                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5017                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5018                     }
5019                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5020                                  &dmin, &next_block, 0, 0);
5021                 }
5022                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5023                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5024                     s->mv_type = MV_TYPE_FIELD;
5025                     s->mb_intra= 0;
5026                     for(dir=0; dir<2; dir++){
5027                         for(i=0; i<2; i++){
5028                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5029                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5030                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5031                         }
5032                     }
5033                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5034                                  &dmin, &next_block, 0, 0);
5035                 }
5036                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5037                     s->mv_dir = 0;
5038                     s->mv_type = MV_TYPE_16X16;
5039                     s->mb_intra= 1;
5040                     s->mv[0][0][0] = 0;
5041                     s->mv[0][0][1] = 0;
5042                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5043                                  &dmin, &next_block, 0, 0);
5044                     if(s->h263_pred || s->h263_aic){
5045                         if(best_s.mb_intra)
5046                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5047                         else
5048                             ff_clean_intra_table_entries(s); //old mode?
5049                     }
5050                 }
5051
5052                 if(s->flags & CODEC_FLAG_QP_RD){
5053                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5054                         const int last_qp= backup_s.qscale;
5055                         int dquant, dir, qp, dc[6];
5056                         DCTELEM ac[6][16];
5057                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5058
5059                         assert(backup_s.dquant == 0);
5060
5061                         //FIXME intra
5062                         s->mv_dir= best_s.mv_dir;
5063                         s->mv_type = MV_TYPE_16X16;
5064                         s->mb_intra= best_s.mb_intra;
5065                         s->mv[0][0][0] = best_s.mv[0][0][0];
5066                         s->mv[0][0][1] = best_s.mv[0][0][1];
5067                         s->mv[1][0][0] = best_s.mv[1][0][0];
5068                         s->mv[1][0][1] = best_s.mv[1][0][1];
5069
5070                         dir= s->pict_type == B_TYPE ? 2 : 1;
5071                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5072                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5073                             qp= last_qp + dquant;
5074                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5075                                 break;
5076                             backup_s.dquant= dquant;
5077                             if(s->mb_intra && s->dc_val[0]){
5078                                 for(i=0; i<6; i++){
5079                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5080                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5081                                 }
5082                             }
5083
5084                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5085                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5086                             if(best_s.qscale != qp){
5087                                 if(s->mb_intra && s->dc_val[0]){
5088                                     for(i=0; i<6; i++){
5089                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5090                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5091                                     }
5092                                 }
5093                                 if(dir > 0 && dquant==dir){
5094                                     dquant= 0;
5095                                     dir= -dir;
5096                                 }else
5097                                     break;
5098                             }
5099                         }
5100                         qp= best_s.qscale;
5101                         s->current_picture.qscale_table[xy]= qp;
5102                     }
5103                 }
5104
5105                 copy_context_after_encode(s, &best_s, -1);
5106
5107                 pb_bits_count= put_bits_count(&s->pb);
5108                 flush_put_bits(&s->pb);
5109                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5110                 s->pb= backup_s.pb;
5111
5112                 if(s->data_partitioning){
5113                     pb2_bits_count= put_bits_count(&s->pb2);
5114                     flush_put_bits(&s->pb2);
5115                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5116                     s->pb2= backup_s.pb2;
5117
5118                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5119                     flush_put_bits(&s->tex_pb);
5120                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5121                     s->tex_pb= backup_s.tex_pb;
5122                 }
5123                 s->last_bits= put_bits_count(&s->pb);
5124
5125                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5126                     ff_h263_update_motion_val(s);
5127
5128                 if(next_block==0){ //FIXME 16 vs linesize16
5129                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5130                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5131                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5132                 }
5133
5134                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5135                     MPV_decode_mb(s, s->block);
5136             } else {
5137                 int motion_x, motion_y;
5138                 s->mv_type=MV_TYPE_16X16;
5139                 // only one MB-Type possible
5140
5141                 switch(mb_type){
5142                 case CANDIDATE_MB_TYPE_INTRA:
5143                     s->mv_dir = 0;
5144                     s->mb_intra= 1;
5145                     motion_x= s->mv[0][0][0] = 0;
5146                     motion_y= s->mv[0][0][1] = 0;
5147                     break;
5148                 case CANDIDATE_MB_TYPE_INTER:
5149                     s->mv_dir = MV_DIR_FORWARD;
5150                     s->mb_intra= 0;
5151                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5152                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5153                     break;
5154                 case CANDIDATE_MB_TYPE_INTER_I:
5155                     s->mv_dir = MV_DIR_FORWARD;
5156                     s->mv_type = MV_TYPE_FIELD;
5157                     s->mb_intra= 0;
5158                     for(i=0; i<2; i++){
5159                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5160                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5161                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5162                     }
5163                     motion_x = motion_y = 0;
5164                     break;
5165                 case CANDIDATE_MB_TYPE_INTER4V:
5166                     s->mv_dir = MV_DIR_FORWARD;
5167                     s->mv_type = MV_TYPE_8X8;
5168                     s->mb_intra= 0;
5169                     for(i=0; i<4; i++){
5170                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5171                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5172                     }
5173                     motion_x= motion_y= 0;
5174                     break;
5175                 case CANDIDATE_MB_TYPE_DIRECT:
5176                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5177                     s->mb_intra= 0;
5178                     motion_x=s->b_direct_mv_table[xy][0];
5179                     motion_y=s->b_direct_mv_table[xy][1];
5180                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5181                     break;
5182                 case CANDIDATE_MB_TYPE_BIDIR:
5183                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5184                     s->mb_intra= 0;
5185                     motion_x=0;
5186                     motion_y=0;
5187                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5188                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5189                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5190                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5191                     break;
5192                 case CANDIDATE_MB_TYPE_BACKWARD:
5193                     s->mv_dir = MV_DIR_BACKWARD;
5194                     s->mb_intra= 0;
5195                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5196                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5197                     break;
5198                 case CANDIDATE_MB_TYPE_FORWARD:
5199                     s->mv_dir = MV_DIR_FORWARD;
5200                     s->mb_intra= 0;
5201                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5202                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5203 //                    printf(" %d %d ", motion_x, motion_y);
5204                     break;
5205                 case CANDIDATE_MB_TYPE_FORWARD_I:
5206                     s->mv_dir = MV_DIR_FORWARD;
5207                     s->mv_type = MV_TYPE_FIELD;
5208                     s->mb_intra= 0;
5209                     for(i=0; i<2; i++){
5210                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5211                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5212                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5213                     }
5214                     motion_x=motion_y=0;
5215                     break;
5216                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5217                     s->mv_dir = MV_DIR_BACKWARD;
5218                     s->mv_type = MV_TYPE_FIELD;
5219                     s->mb_intra= 0;
5220                     for(i=0; i<2; i++){
5221                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5222                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5223                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5224                     }
5225                     motion_x=motion_y=0;
5226                     break;
5227                 case CANDIDATE_MB_TYPE_BIDIR_I:
5228                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5229                     s->mv_type = MV_TYPE_FIELD;
5230                     s->mb_intra= 0;
5231                     for(dir=0; dir<2; dir++){
5232                         for(i=0; i<2; i++){
5233                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5234                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5235                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5236                         }
5237                     }
5238                     motion_x=motion_y=0;
5239                     break;
5240                 default:
5241                     motion_x=motion_y=0; //gcc warning fix
5242                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5243                 }
5244
5245                 encode_mb(s, motion_x, motion_y);
5246
5247                 // RAL: Update last macroblock type
5248                 s->last_mv_dir = s->mv_dir;
5249
5250                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5251                     ff_h263_update_motion_val(s);
5252
5253                 MPV_decode_mb(s, s->block);
5254             }
5255
5256             /* clean the MV table in IPS frames for direct mode in B frames */
5257             if(s->mb_intra /* && I,P,S_TYPE */){
5258                 s->p_mv_table[xy][0]=0;
5259                 s->p_mv_table[xy][1]=0;
5260             }
5261
5262             if(s->flags&CODEC_FLAG_PSNR){
5263                 int w= 16;
5264                 int h= 16;
5265
5266                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5267                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5268
5269                 s->current_picture.error[0] += sse(
5270                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5271                     s->dest[0], w, h, s->linesize);
5272                 s->current_picture.error[1] += sse(
5273                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5274                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5275                 s->current_picture.error[2] += sse(
5276                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5277                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5278             }
5279             if(s->loop_filter){
5280                 if(s->out_format == FMT_H263)
5281                     ff_h263_loop_filter(s);
5282             }
5283 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5284         }
5285     }
5286
5287     //not beautiful here but we must write it before flushing so it has to be here
5288     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5289         msmpeg4_encode_ext_header(s);
5290
5291     write_slice_end(s);
5292
5293     /* Send the last GOB if RTP */
5294     if (s->avctx->rtp_callback) {
5295         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5296         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5297         /* Call the RTP callback to send the last GOB */
5298         emms_c();
5299         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5300     }
5301
5302     return 0;
5303 }
5304
5305 #define MERGE(field) dst->field += src->field; src->field=0
5306 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5307     MERGE(me.scene_change_score);
5308     MERGE(me.mc_mb_var_sum_temp);
5309     MERGE(me.mb_var_sum_temp);
5310 }
5311
5312 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5313     int i;
5314
5315     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5316     MERGE(dct_count[1]);
5317     MERGE(mv_bits);
5318     MERGE(i_tex_bits);
5319     MERGE(p_tex_bits);
5320     MERGE(i_count);
5321     MERGE(f_count);
5322     MERGE(b_count);
5323     MERGE(skip_count);
5324     MERGE(misc_bits);
5325     MERGE(error_count);
5326     MERGE(padding_bug_score);
5327     MERGE(current_picture.error[0]);
5328     MERGE(current_picture.error[1]);
5329     MERGE(current_picture.error[2]);
5330
5331     if(dst->avctx->noise_reduction){
5332         for(i=0; i<64; i++){
5333             MERGE(dct_error_sum[0][i]);
5334             MERGE(dct_error_sum[1][i]);
5335         }
5336     }
5337
5338     assert(put_bits_count(&src->pb) % 8 ==0);
5339     assert(put_bits_count(&dst->pb) % 8 ==0);
5340     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5341     flush_put_bits(&dst->pb);
5342 }
5343
5344 static void estimate_qp(MpegEncContext *s, int dry_run){
5345     if (!s->fixed_qscale)
5346         s->current_picture_ptr->quality=
5347         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5348
5349     if(s->adaptive_quant){
5350         switch(s->codec_id){
5351         case CODEC_ID_MPEG4:
5352             ff_clean_mpeg4_qscales(s);
5353             break;
5354         case CODEC_ID_H263:
5355         case CODEC_ID_H263P:
5356         case CODEC_ID_FLV1:
5357             ff_clean_h263_qscales(s);
5358             break;
5359         }
5360
5361         s->lambda= s->lambda_table[0];
5362         //FIXME broken
5363     }else
5364         s->lambda= s->current_picture.quality;
5365 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5366     update_qscale(s);
5367 }
5368
5369 static void encode_picture(MpegEncContext *s, int picture_number)
5370 {
5371     int i;
5372     int bits;
5373
5374     s->picture_number = picture_number;
5375
5376     /* Reset the average MB variance */
5377     s->me.mb_var_sum_temp    =
5378     s->me.mc_mb_var_sum_temp = 0;
5379
5380     /* we need to initialize some time vars before we can encode b-frames */
5381     // RAL: Condition added for MPEG1VIDEO
5382     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5383         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5384
5385     s->me.scene_change_score=0;
5386
5387 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5388
5389     if(s->pict_type==I_TYPE){
5390         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5391         else                        s->no_rounding=0;
5392     }else if(s->pict_type!=B_TYPE){
5393         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5394             s->no_rounding ^= 1;
5395     }
5396
5397     if(s->flags & CODEC_FLAG_PASS2){
5398         estimate_qp(s, 1);
5399         ff_get_2pass_fcode(s);
5400     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5401         RateControlContext *rcc= &s->rc_context;
5402
5403         if(s->pict_type==B_TYPE)
5404             s->lambda= rcc->last_qscale_for[s->pict_type];
5405         else
5406             s->lambda= rcc->last_qscale_for[rcc->last_non_b_pict_type];
5407         update_qscale(s);
5408     }
5409
5410     s->mb_intra=0; //for the rate distortion & bit compare functions
5411     for(i=1; i<s->avctx->thread_count; i++){
5412         ff_update_duplicate_context(s->thread_context[i], s);
5413     }
5414
5415     ff_init_me(s);
5416
5417     /* Estimate motion for every MB */
5418     if(s->pict_type != I_TYPE){
5419         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5420         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5421         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5422             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5423                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5424             }
5425         }
5426
5427         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5428     }else /* if(s->pict_type == I_TYPE) */{
5429         /* I-Frame */
5430         for(i=0; i<s->mb_stride*s->mb_height; i++)
5431             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5432
5433         if(!s->fixed_qscale){
5434             /* finding spatial complexity for I-frame rate control */
5435             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5436         }
5437     }
5438     for(i=1; i<s->avctx->thread_count; i++){
5439         merge_context_after_me(s, s->thread_context[i]);
5440     }
5441     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5442     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5443     emms_c();
5444
5445     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5446         s->pict_type= I_TYPE;
5447         for(i=0; i<s->mb_stride*s->mb_height; i++)
5448             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5449 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5450     }
5451
5452     if(!s->umvplus){
5453         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5454             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5455
5456             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5457                 int a,b;
5458                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5459                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5460                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5461             }
5462
5463             ff_fix_long_p_mvs(s);
5464             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5465             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5466                 int j;
5467                 for(i=0; i<2; i++){
5468                     for(j=0; j<2; j++)
5469                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5470                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5471                 }
5472             }
5473         }
5474
5475         if(s->pict_type==B_TYPE){
5476             int a, b;
5477
5478             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5479             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5480             s->f_code = FFMAX(a, b);
5481
5482             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5483             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5484             s->b_code = FFMAX(a, b);
5485
5486             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5487             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5488             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5489             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5490             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5491                 int dir, j;
5492                 for(dir=0; dir<2; dir++){
5493                     for(i=0; i<2; i++){
5494                         for(j=0; j<2; j++){
5495                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5496                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5497                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5498                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5499                         }
5500                     }
5501                 }
5502             }
5503         }
5504     }
5505
5506     estimate_qp(s, 0);
5507
5508     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5509         s->qscale= 3; //reduce clipping problems
5510
5511     if (s->out_format == FMT_MJPEG) {
5512         /* for mjpeg, we do include qscale in the matrix */
5513         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5514         for(i=1;i<64;i++){
5515             int j= s->dsp.idct_permutation[i];
5516
5517             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5518         }
5519         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5520                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5521         s->qscale= 8;
5522     }
5523
5524     //FIXME var duplication
5525     s->current_picture_ptr->key_frame=
5526     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5527     s->current_picture_ptr->pict_type=
5528     s->current_picture.pict_type= s->pict_type;
5529
5530     if(s->current_picture.key_frame)
5531         s->picture_in_gop_number=0;
5532
5533     s->last_bits= put_bits_count(&s->pb);
5534     switch(s->out_format) {
5535     case FMT_MJPEG:
5536         mjpeg_picture_header(s);
5537         break;
5538 #ifdef CONFIG_H261_ENCODER
5539     case FMT_H261:
5540         ff_h261_encode_picture_header(s, picture_number);
5541         break;
5542 #endif
5543     case FMT_H263:
5544         if (s->codec_id == CODEC_ID_WMV2)
5545             ff_wmv2_encode_picture_header(s, picture_number);
5546         else if (s->h263_msmpeg4)
5547             msmpeg4_encode_picture_header(s, picture_number);
5548         else if (s->h263_pred)
5549             mpeg4_encode_picture_header(s, picture_number);
5550 #ifdef CONFIG_RV10_ENCODER
5551         else if (s->codec_id == CODEC_ID_RV10)
5552             rv10_encode_picture_header(s, picture_number);
5553 #endif
5554 #ifdef CONFIG_RV20_ENCODER
5555         else if (s->codec_id == CODEC_ID_RV20)
5556             rv20_encode_picture_header(s, picture_number);
5557 #endif
5558         else if (s->codec_id == CODEC_ID_FLV1)
5559             ff_flv_encode_picture_header(s, picture_number);
5560         else
5561             h263_encode_picture_header(s, picture_number);
5562         break;
5563     case FMT_MPEG1:
5564         mpeg1_encode_picture_header(s, picture_number);
5565         break;
5566     case FMT_H264:
5567         break;
5568     default:
5569         assert(0);
5570     }
5571     bits= put_bits_count(&s->pb);
5572     s->header_bits= bits - s->last_bits;
5573
5574     for(i=1; i<s->avctx->thread_count; i++){
5575         update_duplicate_context_after_me(s->thread_context[i], s);
5576     }
5577     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5578     for(i=1; i<s->avctx->thread_count; i++){
5579         merge_context_after_encode(s, s->thread_context[i]);
5580     }
5581     emms_c();
5582 }
5583
5584 #endif //CONFIG_ENCODERS
5585
5586 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5587     const int intra= s->mb_intra;
5588     int i;
5589
5590     s->dct_count[intra]++;
5591
5592     for(i=0; i<64; i++){
5593         int level= block[i];
5594
5595         if(level){
5596             if(level>0){
5597                 s->dct_error_sum[intra][i] += level;
5598                 level -= s->dct_offset[intra][i];
5599                 if(level<0) level=0;
5600             }else{
5601                 s->dct_error_sum[intra][i] -= level;
5602                 level += s->dct_offset[intra][i];
5603                 if(level>0) level=0;
5604             }
5605             block[i]= level;
5606         }
5607     }
5608 }
5609
5610 #ifdef CONFIG_ENCODERS
5611
5612 static int dct_quantize_trellis_c(MpegEncContext *s,
5613                         DCTELEM *block, int n,
5614                         int qscale, int *overflow){
5615     const int *qmat;
5616     const uint8_t *scantable= s->intra_scantable.scantable;
5617     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5618     int max=0;
5619     unsigned int threshold1, threshold2;
5620     int bias=0;
5621     int run_tab[65];
5622     int level_tab[65];
5623     int score_tab[65];
5624     int survivor[65];
5625     int survivor_count;
5626     int last_run=0;
5627     int last_level=0;
5628     int last_score= 0;
5629     int last_i;
5630     int coeff[2][64];
5631     int coeff_count[64];
5632     int qmul, qadd, start_i, last_non_zero, i, dc;
5633     const int esc_length= s->ac_esc_length;
5634     uint8_t * length;
5635     uint8_t * last_length;
5636     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5637
5638     s->dsp.fdct (block);
5639
5640     if(s->dct_error_sum)
5641         s->denoise_dct(s, block);
5642     qmul= qscale*16;
5643     qadd= ((qscale-1)|1)*8;
5644
5645     if (s->mb_intra) {
5646         int q;
5647         if (!s->h263_aic) {
5648             if (n < 4)
5649                 q = s->y_dc_scale;
5650             else
5651                 q = s->c_dc_scale;
5652             q = q << 3;
5653         } else{
5654             /* For AIC we skip quant/dequant of INTRADC */
5655             q = 1 << 3;
5656             qadd=0;
5657         }
5658
5659         /* note: block[0] is assumed to be positive */
5660         block[0] = (block[0] + (q >> 1)) / q;
5661         start_i = 1;
5662         last_non_zero = 0;
5663         qmat = s->q_intra_matrix[qscale];
5664         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5665             bias= 1<<(QMAT_SHIFT-1);
5666         length     = s->intra_ac_vlc_length;
5667         last_length= s->intra_ac_vlc_last_length;
5668     } else {
5669         start_i = 0;
5670         last_non_zero = -1;
5671         qmat = s->q_inter_matrix[qscale];
5672         length     = s->inter_ac_vlc_length;
5673         last_length= s->inter_ac_vlc_last_length;
5674     }
5675     last_i= start_i;
5676
5677     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5678     threshold2= (threshold1<<1);
5679
5680     for(i=63; i>=start_i; i--) {
5681         const int j = scantable[i];
5682         int level = block[j] * qmat[j];
5683
5684         if(((unsigned)(level+threshold1))>threshold2){
5685             last_non_zero = i;
5686             break;
5687         }
5688     }
5689
5690     for(i=start_i; i<=last_non_zero; i++) {
5691         const int j = scantable[i];
5692         int level = block[j] * qmat[j];
5693
5694 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5695 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5696         if(((unsigned)(level+threshold1))>threshold2){
5697             if(level>0){
5698                 level= (bias + level)>>QMAT_SHIFT;
5699                 coeff[0][i]= level;
5700                 coeff[1][i]= level-1;
5701 //                coeff[2][k]= level-2;
5702             }else{
5703                 level= (bias - level)>>QMAT_SHIFT;
5704                 coeff[0][i]= -level;
5705                 coeff[1][i]= -level+1;
5706 //                coeff[2][k]= -level+2;
5707             }
5708             coeff_count[i]= FFMIN(level, 2);
5709             assert(coeff_count[i]);
5710             max |=level;
5711         }else{
5712             coeff[0][i]= (level>>31)|1;
5713             coeff_count[i]= 1;
5714         }
5715     }
5716
5717     *overflow= s->max_qcoeff < max; //overflow might have happened
5718
5719     if(last_non_zero < start_i){
5720         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5721         return last_non_zero;
5722     }
5723
5724     score_tab[start_i]= 0;
5725     survivor[0]= start_i;
5726     survivor_count= 1;
5727
5728     for(i=start_i; i<=last_non_zero; i++){
5729         int level_index, j;
5730         const int dct_coeff= ABS(block[ scantable[i] ]);
5731         const int zero_distoration= dct_coeff*dct_coeff;
5732         int best_score=256*256*256*120;
5733         for(level_index=0; level_index < coeff_count[i]; level_index++){
5734             int distoration;
5735             int level= coeff[level_index][i];
5736             const int alevel= ABS(level);
5737             int unquant_coeff;
5738
5739             assert(level);
5740
5741             if(s->out_format == FMT_H263){
5742                 unquant_coeff= alevel*qmul + qadd;
5743             }else{ //MPEG1
5744                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5745                 if(s->mb_intra){
5746                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5747                         unquant_coeff =   (unquant_coeff - 1) | 1;
5748                 }else{
5749                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5750                         unquant_coeff =   (unquant_coeff - 1) | 1;
5751                 }
5752                 unquant_coeff<<= 3;
5753             }
5754
5755             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5756             level+=64;
5757             if((level&(~127)) == 0){
5758                 for(j=survivor_count-1; j>=0; j--){
5759                     int run= i - survivor[j];
5760                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5761                     score += score_tab[i-run];
5762
5763                     if(score < best_score){
5764                         best_score= score;
5765                         run_tab[i+1]= run;
5766                         level_tab[i+1]= level-64;
5767                     }
5768                 }
5769
5770                 if(s->out_format == FMT_H263){
5771                     for(j=survivor_count-1; j>=0; j--){
5772                         int run= i - survivor[j];
5773                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5774                         score += score_tab[i-run];
5775                         if(score < last_score){
5776                             last_score= score;
5777                             last_run= run;
5778                             last_level= level-64;
5779                             last_i= i+1;
5780                         }
5781                     }
5782                 }
5783             }else{
5784                 distoration += esc_length*lambda;
5785                 for(j=survivor_count-1; j>=0; j--){
5786                     int run= i - survivor[j];
5787                     int score= distoration + score_tab[i-run];
5788
5789                     if(score < best_score){
5790                         best_score= score;
5791                         run_tab[i+1]= run;
5792                         level_tab[i+1]= level-64;
5793                     }
5794                 }
5795
5796                 if(s->out_format == FMT_H263){
5797                   for(j=survivor_count-1; j>=0; j--){
5798                         int run= i - survivor[j];
5799                         int score= distoration + score_tab[i-run];
5800                         if(score < last_score){
5801                             last_score= score;
5802                             last_run= run;
5803                             last_level= level-64;
5804                             last_i= i+1;
5805                         }
5806                     }
5807                 }
5808             }
5809         }
5810
5811         score_tab[i+1]= best_score;
5812
5813         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5814         if(last_non_zero <= 27){
5815             for(; survivor_count; survivor_count--){
5816                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5817                     break;
5818             }
5819         }else{
5820             for(; survivor_count; survivor_count--){
5821                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5822                     break;
5823             }
5824         }
5825
5826         survivor[ survivor_count++ ]= i+1;
5827     }
5828
5829     if(s->out_format != FMT_H263){
5830         last_score= 256*256*256*120;
5831         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5832             int score= score_tab[i];
5833             if(i) score += lambda*2; //FIXME exacter?
5834
5835             if(score < last_score){
5836                 last_score= score;
5837                 last_i= i;
5838                 last_level= level_tab[i];
5839                 last_run= run_tab[i];
5840             }
5841         }
5842     }
5843
5844     s->coded_score[n] = last_score;
5845
5846     dc= ABS(block[0]);
5847     last_non_zero= last_i - 1;
5848     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5849
5850     if(last_non_zero < start_i)
5851         return last_non_zero;
5852
5853     if(last_non_zero == 0 && start_i == 0){
5854         int best_level= 0;
5855         int best_score= dc * dc;
5856
5857         for(i=0; i<coeff_count[0]; i++){
5858             int level= coeff[i][0];
5859             int alevel= ABS(level);
5860             int unquant_coeff, score, distortion;
5861
5862             if(s->out_format == FMT_H263){
5863                     unquant_coeff= (alevel*qmul + qadd)>>3;
5864             }else{ //MPEG1
5865                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5866                     unquant_coeff =   (unquant_coeff - 1) | 1;
5867             }
5868             unquant_coeff = (unquant_coeff + 4) >> 3;
5869             unquant_coeff<<= 3 + 3;
5870
5871             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5872             level+=64;
5873             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5874             else                    score= distortion + esc_length*lambda;
5875
5876             if(score < best_score){
5877                 best_score= score;
5878                 best_level= level - 64;
5879             }
5880         }
5881         block[0]= best_level;
5882         s->coded_score[n] = best_score - dc*dc;
5883         if(best_level == 0) return -1;
5884         else                return last_non_zero;
5885     }
5886
5887     i= last_i;
5888     assert(last_level);
5889
5890     block[ perm_scantable[last_non_zero] ]= last_level;
5891     i -= last_run + 1;
5892
5893     for(; i>start_i; i -= run_tab[i] + 1){
5894         block[ perm_scantable[i-1] ]= level_tab[i];
5895     }
5896
5897     return last_non_zero;
5898 }
5899
5900 //#define REFINE_STATS 1
5901 static int16_t basis[64][64];
5902
5903 static void build_basis(uint8_t *perm){
5904     int i, j, x, y;
5905     emms_c();
5906     for(i=0; i<8; i++){
5907         for(j=0; j<8; j++){
5908             for(y=0; y<8; y++){
5909                 for(x=0; x<8; x++){
5910                     double s= 0.25*(1<<BASIS_SHIFT);
5911                     int index= 8*i + j;
5912                     int perm_index= perm[index];
5913                     if(i==0) s*= sqrt(0.5);
5914                     if(j==0) s*= sqrt(0.5);
5915                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5916                 }
5917             }
5918         }
5919     }
5920 }
5921
5922 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5923                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5924                         int n, int qscale){
5925     int16_t rem[64];
5926     DCTELEM d1[64] __align16;
5927     const int *qmat;
5928     const uint8_t *scantable= s->intra_scantable.scantable;
5929     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5930 //    unsigned int threshold1, threshold2;
5931 //    int bias=0;
5932     int run_tab[65];
5933     int prev_run=0;
5934     int prev_level=0;
5935     int qmul, qadd, start_i, last_non_zero, i, dc;
5936     uint8_t * length;
5937     uint8_t * last_length;
5938     int lambda;
5939     int rle_index, run, q, sum;
5940 #ifdef REFINE_STATS
5941 static int count=0;
5942 static int after_last=0;
5943 static int to_zero=0;
5944 static int from_zero=0;
5945 static int raise=0;
5946 static int lower=0;
5947 static int messed_sign=0;
5948 #endif
5949
5950     if(basis[0][0] == 0)
5951         build_basis(s->dsp.idct_permutation);
5952
5953     qmul= qscale*2;
5954     qadd= (qscale-1)|1;
5955     if (s->mb_intra) {
5956         if (!s->h263_aic) {
5957             if (n < 4)
5958                 q = s->y_dc_scale;
5959             else
5960                 q = s->c_dc_scale;
5961         } else{
5962             /* For AIC we skip quant/dequant of INTRADC */
5963             q = 1;
5964             qadd=0;
5965         }
5966         q <<= RECON_SHIFT-3;
5967         /* note: block[0] is assumed to be positive */
5968         dc= block[0]*q;
5969 //        block[0] = (block[0] + (q >> 1)) / q;
5970         start_i = 1;
5971         qmat = s->q_intra_matrix[qscale];
5972 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5973 //            bias= 1<<(QMAT_SHIFT-1);
5974         length     = s->intra_ac_vlc_length;
5975         last_length= s->intra_ac_vlc_last_length;
5976     } else {
5977         dc= 0;
5978         start_i = 0;
5979         qmat = s->q_inter_matrix[qscale];
5980         length     = s->inter_ac_vlc_length;
5981         last_length= s->inter_ac_vlc_last_length;
5982     }
5983     last_non_zero = s->block_last_index[n];
5984
5985 #ifdef REFINE_STATS
5986 {START_TIMER
5987 #endif
5988     dc += (1<<(RECON_SHIFT-1));
5989     for(i=0; i<64; i++){
5990         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5991     }
5992 #ifdef REFINE_STATS
5993 STOP_TIMER("memset rem[]")}
5994 #endif
5995     sum=0;
5996     for(i=0; i<64; i++){
5997         int one= 36;
5998         int qns=4;
5999         int w;
6000
6001         w= ABS(weight[i]) + qns*one;
6002         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6003
6004         weight[i] = w;
6005 //        w=weight[i] = (63*qns + (w/2)) / w;
6006
6007         assert(w>0);
6008         assert(w<(1<<6));
6009         sum += w*w;
6010     }
6011     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6012 #ifdef REFINE_STATS
6013 {START_TIMER
6014 #endif
6015     run=0;
6016     rle_index=0;
6017     for(i=start_i; i<=last_non_zero; i++){
6018         int j= perm_scantable[i];
6019         const int level= block[j];
6020         int coeff;
6021
6022         if(level){
6023             if(level<0) coeff= qmul*level - qadd;
6024             else        coeff= qmul*level + qadd;
6025             run_tab[rle_index++]=run;
6026             run=0;
6027
6028             s->dsp.add_8x8basis(rem, basis[j], coeff);
6029         }else{
6030             run++;
6031         }
6032     }
6033 #ifdef REFINE_STATS
6034 if(last_non_zero>0){
6035 STOP_TIMER("init rem[]")
6036 }
6037 }
6038
6039 {START_TIMER
6040 #endif
6041     for(;;){
6042         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6043         int best_coeff=0;
6044         int best_change=0;
6045         int run2, best_unquant_change=0, analyze_gradient;
6046 #ifdef REFINE_STATS
6047 {START_TIMER
6048 #endif
6049         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6050
6051         if(analyze_gradient){
6052 #ifdef REFINE_STATS
6053 {START_TIMER
6054 #endif
6055             for(i=0; i<64; i++){
6056                 int w= weight[i];
6057
6058                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6059             }
6060 #ifdef REFINE_STATS
6061 STOP_TIMER("rem*w*w")}
6062 {START_TIMER
6063 #endif
6064             s->dsp.fdct(d1);
6065 #ifdef REFINE_STATS
6066 STOP_TIMER("dct")}
6067 #endif
6068         }
6069
6070         if(start_i){
6071             const int level= block[0];
6072             int change, old_coeff;
6073
6074             assert(s->mb_intra);
6075
6076             old_coeff= q*level;
6077
6078             for(change=-1; change<=1; change+=2){
6079                 int new_level= level + change;
6080                 int score, new_coeff;
6081
6082                 new_coeff= q*new_level;
6083                 if(new_coeff >= 2048 || new_coeff < 0)
6084                     continue;
6085
6086                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6087                 if(score<best_score){
6088                     best_score= score;
6089                     best_coeff= 0;
6090                     best_change= change;
6091                     best_unquant_change= new_coeff - old_coeff;
6092                 }
6093             }
6094         }
6095
6096         run=0;
6097         rle_index=0;
6098         run2= run_tab[rle_index++];
6099         prev_level=0;
6100         prev_run=0;
6101
6102         for(i=start_i; i<64; i++){
6103             int j= perm_scantable[i];
6104             const int level= block[j];
6105             int change, old_coeff;
6106
6107             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6108                 break;
6109
6110             if(level){
6111                 if(level<0) old_coeff= qmul*level - qadd;
6112                 else        old_coeff= qmul*level + qadd;
6113                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6114             }else{
6115                 old_coeff=0;
6116                 run2--;
6117                 assert(run2>=0 || i >= last_non_zero );
6118             }
6119
6120             for(change=-1; change<=1; change+=2){
6121                 int new_level= level + change;
6122                 int score, new_coeff, unquant_change;
6123
6124                 score=0;
6125                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6126                    continue;
6127
6128                 if(new_level){
6129                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6130                     else            new_coeff= qmul*new_level + qadd;
6131                     if(new_coeff >= 2048 || new_coeff <= -2048)
6132                         continue;
6133                     //FIXME check for overflow
6134
6135                     if(level){
6136                         if(level < 63 && level > -63){
6137                             if(i < last_non_zero)
6138                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6139                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6140                             else
6141                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6142                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6143                         }
6144                     }else{
6145                         assert(ABS(new_level)==1);
6146
6147                         if(analyze_gradient){
6148                             int g= d1[ scantable[i] ];
6149                             if(g && (g^new_level) >= 0)
6150                                 continue;
6151                         }
6152
6153                         if(i < last_non_zero){
6154                             int next_i= i + run2 + 1;
6155                             int next_level= block[ perm_scantable[next_i] ] + 64;
6156
6157                             if(next_level&(~127))
6158                                 next_level= 0;
6159
6160                             if(next_i < last_non_zero)
6161                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6162                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6163                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6164                             else
6165                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6166                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6167                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6168                         }else{
6169                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6170                             if(prev_level){
6171                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6172                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6173                             }
6174                         }
6175                     }
6176                 }else{
6177                     new_coeff=0;
6178                     assert(ABS(level)==1);
6179
6180                     if(i < last_non_zero){
6181                         int next_i= i + run2 + 1;
6182                         int next_level= block[ perm_scantable[next_i] ] + 64;
6183
6184                         if(next_level&(~127))
6185                             next_level= 0;
6186
6187                         if(next_i < last_non_zero)
6188                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6189                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6190                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6191                         else
6192                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6193                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6194                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6195                     }else{
6196                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6197                         if(prev_level){
6198                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6199                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6200                         }
6201                     }
6202                 }
6203
6204                 score *= lambda;
6205
6206                 unquant_change= new_coeff - old_coeff;
6207                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6208
6209                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6210                 if(score<best_score){
6211                     best_score= score;
6212                     best_coeff= i;
6213                     best_change= change;
6214                     best_unquant_change= unquant_change;
6215                 }
6216             }
6217             if(level){
6218                 prev_level= level + 64;
6219                 if(prev_level&(~127))
6220                     prev_level= 0;
6221                 prev_run= run;
6222                 run=0;
6223             }else{
6224                 run++;
6225             }
6226         }
6227 #ifdef REFINE_STATS
6228 STOP_TIMER("iterative step")}
6229 #endif
6230
6231         if(best_change){
6232             int j= perm_scantable[ best_coeff ];
6233
6234             block[j] += best_change;
6235
6236             if(best_coeff > last_non_zero){
6237                 last_non_zero= best_coeff;
6238                 assert(block[j]);
6239 #ifdef REFINE_STATS
6240 after_last++;
6241 #endif
6242             }else{
6243 #ifdef REFINE_STATS
6244 if(block[j]){
6245     if(block[j] - best_change){
6246         if(ABS(block[j]) > ABS(block[j] - best_change)){
6247             raise++;
6248         }else{
6249             lower++;
6250         }
6251     }else{
6252         from_zero++;
6253     }
6254 }else{
6255     to_zero++;
6256 }
6257 #endif
6258                 for(; last_non_zero>=start_i; last_non_zero--){
6259                     if(block[perm_scantable[last_non_zero]])
6260                         break;
6261                 }
6262             }
6263 #ifdef REFINE_STATS
6264 count++;
6265 if(256*256*256*64 % count == 0){
6266     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6267 }
6268 #endif
6269             run=0;
6270             rle_index=0;
6271             for(i=start_i; i<=last_non_zero; i++){
6272                 int j= perm_scantable[i];
6273                 const int level= block[j];
6274
6275                  if(level){
6276                      run_tab[rle_index++]=run;
6277                      run=0;
6278                  }else{
6279                      run++;
6280                  }
6281             }
6282
6283             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6284         }else{
6285             break;
6286         }
6287     }
6288 #ifdef REFINE_STATS
6289 if(last_non_zero>0){
6290 STOP_TIMER("iterative search")
6291 }
6292 }
6293 #endif
6294
6295     return last_non_zero;
6296 }
6297
6298 static int dct_quantize_c(MpegEncContext *s,
6299                         DCTELEM *block, int n,
6300                         int qscale, int *overflow)
6301 {
6302     int i, j, level, last_non_zero, q, start_i;
6303     const int *qmat;
6304     const uint8_t *scantable= s->intra_scantable.scantable;
6305     int bias;
6306     int max=0;
6307     unsigned int threshold1, threshold2;
6308
6309     s->dsp.fdct (block);
6310
6311     if(s->dct_error_sum)
6312         s->denoise_dct(s, block);
6313
6314     if (s->mb_intra) {
6315         if (!s->h263_aic) {
6316             if (n < 4)
6317                 q = s->y_dc_scale;
6318             else
6319                 q = s->c_dc_scale;
6320             q = q << 3;
6321         } else
6322             /* For AIC we skip quant/dequant of INTRADC */
6323             q = 1 << 3;
6324
6325         /* note: block[0] is assumed to be positive */
6326         block[0] = (block[0] + (q >> 1)) / q;
6327         start_i = 1;
6328         last_non_zero = 0;
6329         qmat = s->q_intra_matrix[qscale];
6330         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6331     } else {
6332         start_i = 0;
6333         last_non_zero = -1;
6334         qmat = s->q_inter_matrix[qscale];
6335         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6336     }
6337     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6338     threshold2= (threshold1<<1);
6339     for(i=63;i>=start_i;i--) {
6340         j = scantable[i];
6341         level = block[j] * qmat[j];
6342
6343         if(((unsigned)(level+threshold1))>threshold2){
6344             last_non_zero = i;
6345             break;
6346         }else{
6347             block[j]=0;
6348         }
6349     }
6350     for(i=start_i; i<=last_non_zero; i++) {
6351         j = scantable[i];
6352         level = block[j] * qmat[j];
6353
6354 //        if(   bias+level >= (1<<QMAT_SHIFT)
6355 //           || bias-level >= (1<<QMAT_SHIFT)){
6356         if(((unsigned)(level+threshold1))>threshold2){
6357             if(level>0){
6358                 level= (bias + level)>>QMAT_SHIFT;
6359                 block[j]= level;
6360             }else{
6361                 level= (bias - level)>>QMAT_SHIFT;
6362                 block[j]= -level;
6363             }
6364             max |=level;
6365         }else{
6366             block[j]=0;
6367         }
6368     }
6369     *overflow= s->max_qcoeff < max; //overflow might have happened
6370
6371     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6372     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6373         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6374
6375     return last_non_zero;
6376 }
6377
6378 #endif //CONFIG_ENCODERS
6379
6380 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6381                                    DCTELEM *block, int n, int qscale)
6382 {
6383     int i, level, nCoeffs;
6384     const uint16_t *quant_matrix;
6385
6386     nCoeffs= s->block_last_index[n];
6387
6388     if (n < 4)
6389         block[0] = block[0] * s->y_dc_scale;
6390     else
6391         block[0] = block[0] * s->c_dc_scale;
6392     /* XXX: only mpeg1 */
6393     quant_matrix = s->intra_matrix;
6394     for(i=1;i<=nCoeffs;i++) {
6395         int j= s->intra_scantable.permutated[i];
6396         level = block[j];
6397         if (level) {
6398             if (level < 0) {
6399                 level = -level;
6400                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6401                 level = (level - 1) | 1;
6402                 level = -level;
6403             } else {
6404                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6405                 level = (level - 1) | 1;
6406             }
6407             block[j] = level;
6408         }
6409     }
6410 }
6411
6412 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6413                                    DCTELEM *block, int n, int qscale)
6414 {
6415     int i, level, nCoeffs;
6416     const uint16_t *quant_matrix;
6417
6418     nCoeffs= s->block_last_index[n];
6419
6420     quant_matrix = s->inter_matrix;
6421     for(i=0; i<=nCoeffs; i++) {
6422         int j= s->intra_scantable.permutated[i];
6423         level = block[j];
6424         if (level) {
6425             if (level < 0) {
6426                 level = -level;
6427                 level = (((level << 1) + 1) * qscale *
6428                          ((int) (quant_matrix[j]))) >> 4;
6429                 level = (level - 1) | 1;
6430                 level = -level;
6431             } else {
6432                 level = (((level << 1) + 1) * qscale *
6433                          ((int) (quant_matrix[j]))) >> 4;
6434                 level = (level - 1) | 1;
6435             }
6436             block[j] = level;
6437         }
6438     }
6439 }
6440
6441 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6442                                    DCTELEM *block, int n, int qscale)
6443 {
6444     int i, level, nCoeffs;
6445     const uint16_t *quant_matrix;
6446
6447     if(s->alternate_scan) nCoeffs= 63;
6448     else nCoeffs= s->block_last_index[n];
6449
6450     if (n < 4)
6451         block[0] = block[0] * s->y_dc_scale;
6452     else
6453         block[0] = block[0] * s->c_dc_scale;
6454     quant_matrix = s->intra_matrix;
6455     for(i=1;i<=nCoeffs;i++) {
6456         int j= s->intra_scantable.permutated[i];
6457         level = block[j];
6458         if (level) {
6459             if (level < 0) {
6460                 level = -level;
6461                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6462                 level = -level;
6463             } else {
6464                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6465             }
6466             block[j] = level;
6467         }
6468     }
6469 }
6470
6471 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6472                                    DCTELEM *block, int n, int qscale)
6473 {
6474     int i, level, nCoeffs;
6475     const uint16_t *quant_matrix;
6476     int sum=-1;
6477
6478     if(s->alternate_scan) nCoeffs= 63;
6479     else nCoeffs= s->block_last_index[n];
6480
6481     quant_matrix = s->inter_matrix;
6482     for(i=0; i<=nCoeffs; i++) {
6483         int j= s->intra_scantable.permutated[i];
6484         level = block[j];
6485         if (level) {
6486             if (level < 0) {
6487                 level = -level;
6488                 level = (((level << 1) + 1) * qscale *
6489                          ((int) (quant_matrix[j]))) >> 4;
6490                 level = -level;
6491             } else {
6492                 level = (((level << 1) + 1) * qscale *
6493                          ((int) (quant_matrix[j]))) >> 4;
6494             }
6495             block[j] = level;
6496             sum+=level;
6497         }
6498     }
6499     block[63]^=sum&1;
6500 }
6501
6502 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6503                                   DCTELEM *block, int n, int qscale)
6504 {
6505     int i, level, qmul, qadd;
6506     int nCoeffs;
6507
6508     assert(s->block_last_index[n]>=0);
6509
6510     qmul = qscale << 1;
6511
6512     if (!s->h263_aic) {
6513         if (n < 4)
6514             block[0] = block[0] * s->y_dc_scale;
6515         else
6516             block[0] = block[0] * s->c_dc_scale;
6517         qadd = (qscale - 1) | 1;
6518     }else{
6519         qadd = 0;
6520     }
6521     if(s->ac_pred)
6522         nCoeffs=63;
6523     else
6524         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6525
6526     for(i=1; i<=nCoeffs; i++) {
6527         level = block[i];
6528         if (level) {
6529             if (level < 0) {
6530                 level = level * qmul - qadd;
6531             } else {
6532                 level = level * qmul + qadd;
6533             }
6534             block[i] = level;
6535         }
6536     }
6537 }
6538
6539 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6540                                   DCTELEM *block, int n, int qscale)
6541 {
6542     int i, level, qmul, qadd;
6543     int nCoeffs;
6544
6545     assert(s->block_last_index[n]>=0);
6546
6547     qadd = (qscale - 1) | 1;
6548     qmul = qscale << 1;
6549
6550     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6551
6552     for(i=0; i<=nCoeffs; i++) {
6553         level = block[i];
6554         if (level) {
6555             if (level < 0) {
6556                 level = level * qmul - qadd;
6557             } else {
6558                 level = level * qmul + qadd;
6559             }
6560             block[i] = level;
6561         }
6562     }
6563 }
6564
6565 #ifdef CONFIG_ENCODERS
6566 AVCodec h263_encoder = {
6567     "h263",
6568     CODEC_TYPE_VIDEO,
6569     CODEC_ID_H263,
6570     sizeof(MpegEncContext),
6571     MPV_encode_init,
6572     MPV_encode_picture,
6573     MPV_encode_end,
6574     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6575 };
6576
6577 AVCodec h263p_encoder = {
6578     "h263p",
6579     CODEC_TYPE_VIDEO,
6580     CODEC_ID_H263P,
6581     sizeof(MpegEncContext),
6582     MPV_encode_init,
6583     MPV_encode_picture,
6584     MPV_encode_end,
6585     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6586 };
6587
6588 AVCodec flv_encoder = {
6589     "flv",
6590     CODEC_TYPE_VIDEO,
6591     CODEC_ID_FLV1,
6592     sizeof(MpegEncContext),
6593     MPV_encode_init,
6594     MPV_encode_picture,
6595     MPV_encode_end,
6596     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6597 };
6598
6599 AVCodec rv10_encoder = {
6600     "rv10",
6601     CODEC_TYPE_VIDEO,
6602     CODEC_ID_RV10,
6603     sizeof(MpegEncContext),
6604     MPV_encode_init,
6605     MPV_encode_picture,
6606     MPV_encode_end,
6607     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6608 };
6609
6610 AVCodec rv20_encoder = {
6611     "rv20",
6612     CODEC_TYPE_VIDEO,
6613     CODEC_ID_RV20,
6614     sizeof(MpegEncContext),
6615     MPV_encode_init,
6616     MPV_encode_picture,
6617     MPV_encode_end,
6618     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6619 };
6620
6621 AVCodec mpeg4_encoder = {
6622     "mpeg4",
6623     CODEC_TYPE_VIDEO,
6624     CODEC_ID_MPEG4,
6625     sizeof(MpegEncContext),
6626     MPV_encode_init,
6627     MPV_encode_picture,
6628     MPV_encode_end,
6629     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6630     .capabilities= CODEC_CAP_DELAY,
6631 };
6632
6633 AVCodec msmpeg4v1_encoder = {
6634     "msmpeg4v1",
6635     CODEC_TYPE_VIDEO,
6636     CODEC_ID_MSMPEG4V1,
6637     sizeof(MpegEncContext),
6638     MPV_encode_init,
6639     MPV_encode_picture,
6640     MPV_encode_end,
6641     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6642 };
6643
6644 AVCodec msmpeg4v2_encoder = {
6645     "msmpeg4v2",
6646     CODEC_TYPE_VIDEO,
6647     CODEC_ID_MSMPEG4V2,
6648     sizeof(MpegEncContext),
6649     MPV_encode_init,
6650     MPV_encode_picture,
6651     MPV_encode_end,
6652     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6653 };
6654
6655 AVCodec msmpeg4v3_encoder = {
6656     "msmpeg4",
6657     CODEC_TYPE_VIDEO,
6658     CODEC_ID_MSMPEG4V3,
6659     sizeof(MpegEncContext),
6660     MPV_encode_init,
6661     MPV_encode_picture,
6662     MPV_encode_end,
6663     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6664 };
6665
6666 AVCodec wmv1_encoder = {
6667     "wmv1",
6668     CODEC_TYPE_VIDEO,
6669     CODEC_ID_WMV1,
6670     sizeof(MpegEncContext),
6671     MPV_encode_init,
6672     MPV_encode_picture,
6673     MPV_encode_end,
6674     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6675 };
6676
6677 AVCodec mjpeg_encoder = {
6678     "mjpeg",
6679     CODEC_TYPE_VIDEO,
6680     CODEC_ID_MJPEG,
6681     sizeof(MpegEncContext),
6682     MPV_encode_init,
6683     MPV_encode_picture,
6684     MPV_encode_end,
6685     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6686 };
6687
6688 #endif //CONFIG_ENCODERS