git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 546     COPY(progressive_frame); //FIXME don't set in encode_header
 547     COPY(partitioned_frame); //FIXME don't set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_JPEGLS:
1119     case CODEC_ID_MJPEG:
1120         s->out_format = FMT_MJPEG;
1121         s->intra_only = 1; /* force intra only for jpeg */
1122         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1123         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1124         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1125         s->mjpeg_vsample[1] = 1;
1126         s->mjpeg_vsample[2] = 1;
1127         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1128         s->mjpeg_hsample[1] = 1;
1129         s->mjpeg_hsample[2] = 1;
1130         if (mjpeg_init(s) < 0)
1131             return -1;
1132         avctx->delay=0;
1133         s->low_delay=1;
1134         break;
1135     case CODEC_ID_H261:
1136         s->out_format = FMT_H261;
1137         avctx->delay=0;
1138         s->low_delay=1;
1139         break;
1140     case CODEC_ID_H263:
1141         if (h263_get_picture_format(s->width, s->height) == 7) {
1142             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1143             return -1;
1144         }
1145         s->out_format = FMT_H263;
1146         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1147         avctx->delay=0;
1148         s->low_delay=1;
1149         break;
1150     case CODEC_ID_H263P:
1151         s->out_format = FMT_H263;
1152         s->h263_plus = 1;
1153         /* Fx */
1154         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1155         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1156         s->modified_quant= s->h263_aic;
1157         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1158         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1159         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1160         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1161         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1162
1163         /* /Fx */
1164         /* These are just to be sure */
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_FLV1:
1169         s->out_format = FMT_H263;
1170         s->h263_flv = 2; /* format = 1; 11-bit codes */
1171         s->unrestricted_mv = 1;
1172         s->rtp_mode=0; /* don't allow GOB */
1173         avctx->delay=0;
1174         s->low_delay=1;
1175         break;
1176     case CODEC_ID_RV10:
1177         s->out_format = FMT_H263;
1178         avctx->delay=0;
1179         s->low_delay=1;
1180         break;
1181     case CODEC_ID_RV20:
1182         s->out_format = FMT_H263;
1183         avctx->delay=0;
1184         s->low_delay=1;
1185         s->modified_quant=1;
1186         s->h263_aic=1;
1187         s->h263_plus=1;
1188         s->loop_filter=1;
1189         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1190         break;
1191     case CODEC_ID_MPEG4:
1192         s->out_format = FMT_H263;
1193         s->h263_pred = 1;
1194         s->unrestricted_mv = 1;
1195         s->low_delay= s->max_b_frames ? 0 : 1;
1196         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1197         break;
1198     case CODEC_ID_MSMPEG4V1:
1199         s->out_format = FMT_H263;
1200         s->h263_msmpeg4 = 1;
1201         s->h263_pred = 1;
1202         s->unrestricted_mv = 1;
1203         s->msmpeg4_version= 1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_MSMPEG4V2:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 2;
1213         avctx->delay=0;
1214         s->low_delay=1;
1215         break;
1216     case CODEC_ID_MSMPEG4V3:
1217         s->out_format = FMT_H263;
1218         s->h263_msmpeg4 = 1;
1219         s->h263_pred = 1;
1220         s->unrestricted_mv = 1;
1221         s->msmpeg4_version= 3;
1222         s->flipflop_rounding=1;
1223         avctx->delay=0;
1224         s->low_delay=1;
1225         break;
1226     case CODEC_ID_WMV1:
1227         s->out_format = FMT_H263;
1228         s->h263_msmpeg4 = 1;
1229         s->h263_pred = 1;
1230         s->unrestricted_mv = 1;
1231         s->msmpeg4_version= 4;
1232         s->flipflop_rounding=1;
1233         avctx->delay=0;
1234         s->low_delay=1;
1235         break;
1236     case CODEC_ID_WMV2:
1237         s->out_format = FMT_H263;
1238         s->h263_msmpeg4 = 1;
1239         s->h263_pred = 1;
1240         s->unrestricted_mv = 1;
1241         s->msmpeg4_version= 5;
1242         s->flipflop_rounding=1;
1243         avctx->delay=0;
1244         s->low_delay=1;
1245         break;
1246     default:
1247         return -1;
1248     }
1249
1250     avctx->has_b_frames= !s->low_delay;
1251
1252     s->encoding = 1;
1253
1254     /* init */
1255     if (MPV_common_init(s) < 0)
1256         return -1;
1257
1258     if(s->modified_quant)
1259         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1260     s->progressive_frame=
1261     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1262     s->quant_precision=5;
1263
1264     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1265     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1266
1267 #ifdef CONFIG_H261_ENCODER
1268     if (s->out_format == FMT_H261)
1269         ff_h261_encode_init(s);
1270 #endif
1271     if (s->out_format == FMT_H263)
1272         h263_encode_init(s);
1273     if(s->msmpeg4_version)
1274         ff_msmpeg4_encode_init(s);
1275     if (s->out_format == FMT_MPEG1)
1276         ff_mpeg1_encode_init(s);
1277
1278     /* init q matrix */
1279     for(i=0;i<64;i++) {
1280         int j= s->dsp.idct_permutation[i];
1281         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1282             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1283             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1284         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1285             s->intra_matrix[j] =
1286             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1287         }else
1288         { /* mpeg1/2 */
1289             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1290             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1291         }
1292         if(s->avctx->intra_matrix)
1293             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1294         if(s->avctx->inter_matrix)
1295             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1296     }
1297
1298     /* precompute matrix */
1299     /* for mjpeg, we do include qscale in the matrix */
1300     if (s->out_format != FMT_MJPEG) {
1301         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1302                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1303         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1304                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1305     }
1306
1307     if(ff_rate_control_init(s) < 0)
1308         return -1;
1309
1310     return 0;
1311 }
1312
1313 int MPV_encode_end(AVCodecContext *avctx)
1314 {
1315     MpegEncContext *s = avctx->priv_data;
1316
1317 #ifdef STATS
1318     print_stats();
1319 #endif
1320
1321     ff_rate_control_uninit(s);
1322
1323     MPV_common_end(s);
1324     if (s->out_format == FMT_MJPEG)
1325         mjpeg_close(s);
1326
1327     av_freep(&avctx->extradata);
1328
1329     return 0;
1330 }
1331
1332 #endif //CONFIG_ENCODERS
1333
1334 void init_rl(RLTable *rl, int use_static)
1335 {
1336     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1337     uint8_t index_run[MAX_RUN+1];
1338     int last, run, level, start, end, i;
1339
1340     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1341     if(use_static && rl->max_level[0])
1342         return;
1343
1344     /* compute max_level[], max_run[] and index_run[] */
1345     for(last=0;last<2;last++) {
1346         if (last == 0) {
1347             start = 0;
1348             end = rl->last;
1349         } else {
1350             start = rl->last;
1351             end = rl->n;
1352         }
1353
1354         memset(max_level, 0, MAX_RUN + 1);
1355         memset(max_run, 0, MAX_LEVEL + 1);
1356         memset(index_run, rl->n, MAX_RUN + 1);
1357         for(i=start;i<end;i++) {
1358             run = rl->table_run[i];
1359             level = rl->table_level[i];
1360             if (index_run[run] == rl->n)
1361                 index_run[run] = i;
1362             if (level > max_level[run])
1363                 max_level[run] = level;
1364             if (run > max_run[level])
1365                 max_run[level] = run;
1366         }
1367         if(use_static)
1368             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1369         else
1370             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1371         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1372         if(use_static)
1373             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1374         else
1375             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1376         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1377         if(use_static)
1378             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1379         else
1380             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1381         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1382     }
1383 }
1384
1385 /* draw the edges of width 'w' of an image of size width, height */
1386 //FIXME check that this is ok for mpeg4 interlaced
1387 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1388 {
1389     uint8_t *ptr, *last_line;
1390     int i;
1391
1392     last_line = buf + (height - 1) * wrap;
1393     for(i=0;i<w;i++) {
1394         /* top and bottom */
1395         memcpy(buf - (i + 1) * wrap, buf, width);
1396         memcpy(last_line + (i + 1) * wrap, last_line, width);
1397     }
1398     /* left and right */
1399     ptr = buf;
1400     for(i=0;i<height;i++) {
1401         memset(ptr - w, ptr[0], w);
1402         memset(ptr + width, ptr[width-1], w);
1403         ptr += wrap;
1404     }
1405     /* corners */
1406     for(i=0;i<w;i++) {
1407         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1408         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1409         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1410         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1411     }
1412 }
1413
1414 int ff_find_unused_picture(MpegEncContext *s, int shared){
1415     int i;
1416
1417     if(shared){
1418         for(i=0; i<MAX_PICTURE_COUNT; i++){
1419             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1420         }
1421     }else{
1422         for(i=0; i<MAX_PICTURE_COUNT; i++){
1423             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1424         }
1425         for(i=0; i<MAX_PICTURE_COUNT; i++){
1426             if(s->picture[i].data[0]==NULL) return i;
1427         }
1428     }
1429
1430     assert(0);
1431     return -1;
1432 }
1433
1434 static void update_noise_reduction(MpegEncContext *s){
1435     int intra, i;
1436
1437     for(intra=0; intra<2; intra++){
1438         if(s->dct_count[intra] > (1<<16)){
1439             for(i=0; i<64; i++){
1440                 s->dct_error_sum[intra][i] >>=1;
1441             }
1442             s->dct_count[intra] >>= 1;
1443         }
1444
1445         for(i=0; i<64; i++){
1446             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1447         }
1448     }
1449 }
1450
1451 /**
1452  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1453  */
1454 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1455 {
1456     int i;
1457     AVFrame *pic;
1458     s->mb_skipped = 0;
1459
1460     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1461
1462     /* mark&release old frames */
1463     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1464         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1465
1466         /* release forgotten pictures */
1467         /* if(mpeg124/h263) */
1468         if(!s->encoding){
1469             for(i=0; i<MAX_PICTURE_COUNT; i++){
1470                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1471                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1472                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1473                 }
1474             }
1475         }
1476     }
1477 alloc:
1478     if(!s->encoding){
1479         /* release non reference frames */
1480         for(i=0; i<MAX_PICTURE_COUNT; i++){
1481             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1482                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1483             }
1484         }
1485
1486         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1487             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1488         else{
1489             i= ff_find_unused_picture(s, 0);
1490             pic= (AVFrame*)&s->picture[i];
1491         }
1492
1493         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1494                         && !s->dropable ? 3 : 0;
1495
1496         pic->coded_picture_number= s->coded_picture_number++;
1497
1498         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1499             return -1;
1500
1501         s->current_picture_ptr= (Picture*)pic;
1502         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1503         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1504     }
1505
1506     s->current_picture_ptr->pict_type= s->pict_type;
1507 //    if(s->flags && CODEC_FLAG_QSCALE)
1508   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1509     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1510
1511     copy_picture(&s->current_picture, s->current_picture_ptr);
1512
1513   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1514     if (s->pict_type != B_TYPE) {
1515         s->last_picture_ptr= s->next_picture_ptr;
1516         if(!s->dropable)
1517             s->next_picture_ptr= s->current_picture_ptr;
1518     }
1519 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1520         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1521         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1522         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1523         s->pict_type, s->dropable);*/
1524
1525     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1526     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1527
1528     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1529         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1530         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1531         goto alloc;
1532     }
1533
1534     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1535
1536     if(s->picture_structure!=PICT_FRAME){
1537         int i;
1538         for(i=0; i<4; i++){
1539             if(s->picture_structure == PICT_BOTTOM_FIELD){
1540                  s->current_picture.data[i] += s->current_picture.linesize[i];
1541             }
1542             s->current_picture.linesize[i] *= 2;
1543             s->last_picture.linesize[i] *=2;
1544             s->next_picture.linesize[i] *=2;
1545         }
1546     }
1547   }
1548
1549     s->hurry_up= s->avctx->hurry_up;
1550     s->error_resilience= avctx->error_resilience;
1551
1552     /* set dequantizer, we can't do it during init as it might change for mpeg4
1553        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1554     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1555         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1556         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1557     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1558         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1559         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1560     }else{
1561         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1562         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1563     }
1564
1565     if(s->dct_error_sum){
1566         assert(s->avctx->noise_reduction && s->encoding);
1567
1568         update_noise_reduction(s);
1569     }
1570
1571 #ifdef HAVE_XVMC
1572     if(s->avctx->xvmc_acceleration)
1573         return XVMC_field_start(s, avctx);
1574 #endif
1575     return 0;
1576 }
1577
1578 /* generic function for encode/decode called after a frame has been coded/decoded */
1579 void MPV_frame_end(MpegEncContext *s)
1580 {
1581     int i;
1582     /* draw edge for correct motion prediction if outside */
1583 #ifdef HAVE_XVMC
1584 //just to make sure that all data is rendered.
1585     if(s->avctx->xvmc_acceleration){
1586         XVMC_field_end(s);
1587     }else
1588 #endif
1589     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1590             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1591             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1593     }
1594     emms_c();
1595
1596     s->last_pict_type    = s->pict_type;
1597     if(s->pict_type!=B_TYPE){
1598         s->last_non_b_pict_type= s->pict_type;
1599     }
1600 #if 0
1601         /* copy back current_picture variables */
1602     for(i=0; i<MAX_PICTURE_COUNT; i++){
1603         if(s->picture[i].data[0] == s->current_picture.data[0]){
1604             s->picture[i]= s->current_picture;
1605             break;
1606         }
1607     }
1608     assert(i<MAX_PICTURE_COUNT);
1609 #endif
1610
1611     if(s->encoding){
1612         /* release non-reference frames */
1613         for(i=0; i<MAX_PICTURE_COUNT; i++){
1614             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1615                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1616             }
1617         }
1618     }
1619     // clear copies, to avoid confusion
1620 #if 0
1621     memset(&s->last_picture, 0, sizeof(Picture));
1622     memset(&s->next_picture, 0, sizeof(Picture));
1623     memset(&s->current_picture, 0, sizeof(Picture));
1624 #endif
1625     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1626 }
1627
1628 /**
1629  * draws an line from (ex, ey) -> (sx, sy).
1630  * @param w width of the image
1631  * @param h height of the image
1632  * @param stride stride/linesize of the image
1633  * @param color color of the arrow
1634  */
1635 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1636     int t, x, y, fr, f;
1637
1638     sx= clip(sx, 0, w-1);
1639     sy= clip(sy, 0, h-1);
1640     ex= clip(ex, 0, w-1);
1641     ey= clip(ey, 0, h-1);
1642
1643     buf[sy*stride + sx]+= color;
1644
1645     if(ABS(ex - sx) > ABS(ey - sy)){
1646         if(sx > ex){
1647             t=sx; sx=ex; ex=t;
1648             t=sy; sy=ey; ey=t;
1649         }
1650         buf+= sx + sy*stride;
1651         ex-= sx;
1652         f= ((ey-sy)<<16)/ex;
1653         for(x= 0; x <= ex; x++){
1654             y = (x*f)>>16;
1655             fr= (x*f)&0xFFFF;
1656             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1657             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1658         }
1659     }else{
1660         if(sy > ey){
1661             t=sx; sx=ex; ex=t;
1662             t=sy; sy=ey; ey=t;
1663         }
1664         buf+= sx + sy*stride;
1665         ey-= sy;
1666         if(ey) f= ((ex-sx)<<16)/ey;
1667         else   f= 0;
1668         for(y= 0; y <= ey; y++){
1669             x = (y*f)>>16;
1670             fr= (y*f)&0xFFFF;
1671             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1672             buf[y*stride + x+1]+= (color*         fr )>>16;;
1673         }
1674     }
1675 }
1676
1677 /**
1678  * draws an arrow from (ex, ey) -> (sx, sy).
1679  * @param w width of the image
1680  * @param h height of the image
1681  * @param stride stride/linesize of the image
1682  * @param color color of the arrow
1683  */
1684 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1685     int dx,dy;
1686
1687     sx= clip(sx, -100, w+100);
1688     sy= clip(sy, -100, h+100);
1689     ex= clip(ex, -100, w+100);
1690     ey= clip(ey, -100, h+100);
1691
1692     dx= ex - sx;
1693     dy= ey - sy;
1694
1695     if(dx*dx + dy*dy > 3*3){
1696         int rx=  dx + dy;
1697         int ry= -dx + dy;
1698         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1699
1700         //FIXME subpixel accuracy
1701         rx= ROUNDED_DIV(rx*3<<4, length);
1702         ry= ROUNDED_DIV(ry*3<<4, length);
1703
1704         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1705         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1706     }
1707     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1708 }
1709
1710 /**
1711  * prints debuging info for the given picture.
1712  */
1713 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1714
1715     if(!pict || !pict->mb_type) return;
1716
1717     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1718         int x,y;
1719
1720         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1721         switch (pict->pict_type) {
1722             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1723             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1724             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1725             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1726             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1727             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1728         }
1729         for(y=0; y<s->mb_height; y++){
1730             for(x=0; x<s->mb_width; x++){
1731                 if(s->avctx->debug&FF_DEBUG_SKIP){
1732                     int count= s->mbskip_table[x + y*s->mb_stride];
1733                     if(count>9) count=9;
1734                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1735                 }
1736                 if(s->avctx->debug&FF_DEBUG_QP){
1737                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1738                 }
1739                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1740                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1741                     //Type & MV direction
1742                     if(IS_PCM(mb_type))
1743                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1744                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1745                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1746                     else if(IS_INTRA4x4(mb_type))
1747                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1748                     else if(IS_INTRA16x16(mb_type))
1749                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1750                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1751                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1752                     else if(IS_DIRECT(mb_type))
1753                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1754                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1755                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1756                     else if(IS_GMC(mb_type))
1757                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1758                     else if(IS_SKIP(mb_type))
1759                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1760                     else if(!USES_LIST(mb_type, 1))
1761                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1762                     else if(!USES_LIST(mb_type, 0))
1763                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1764                     else{
1765                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1766                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1767                     }
1768
1769                     //segmentation
1770                     if(IS_8X8(mb_type))
1771                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1772                     else if(IS_16X8(mb_type))
1773                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1774                     else if(IS_8X16(mb_type))
1775                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1776                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1777                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1778                     else
1779                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1780
1781
1782                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1783                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1784                     else
1785                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1786                 }
1787 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1788             }
1789             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1790         }
1791     }
1792
1793     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1794         const int shift= 1 + s->quarter_sample;
1795         int mb_y;
1796         uint8_t *ptr;
1797         int i;
1798         int h_chroma_shift, v_chroma_shift;
1799         const int width = s->avctx->width;
1800         const int height= s->avctx->height;
1801         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1802         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1803         s->low_delay=0; //needed to see the vectors without trashing the buffers
1804
1805         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1806         for(i=0; i<3; i++){
1807             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1808             pict->data[i]= s->visualization_buffer[i];
1809         }
1810         pict->type= FF_BUFFER_TYPE_COPY;
1811         ptr= pict->data[0];
1812
1813         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1814             int mb_x;
1815             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1816                 const int mb_index= mb_x + mb_y*s->mb_stride;
1817                 if((s->avctx->debug_mv) && pict->motion_val){
1818                   int type;
1819                   for(type=0; type<3; type++){
1820                     int direction = 0;
1821                     switch (type) {
1822                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1823                                 continue;
1824                               direction = 0;
1825                               break;
1826                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1827                                 continue;
1828                               direction = 0;
1829                               break;
1830                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1831                                 continue;
1832                               direction = 1;
1833                               break;
1834                     }
1835                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1836                         continue;
1837
1838                     if(IS_8X8(pict->mb_type[mb_index])){
1839                       int i;
1840                       for(i=0; i<4; i++){
1841                         int sx= mb_x*16 + 4 + 8*(i&1);
1842                         int sy= mb_y*16 + 4 + 8*(i>>1);
1843                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1844                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1845                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1846                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1847                       }
1848                     }else if(IS_16X8(pict->mb_type[mb_index])){
1849                       int i;
1850                       for(i=0; i<2; i++){
1851                         int sx=mb_x*16 + 8;
1852                         int sy=mb_y*16 + 4 + 8*i;
1853                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1854                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1855                         int my=(pict->motion_val[direction][xy][1]>>shift);
1856
1857                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1858                             my*=2;
1859
1860                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1861                       }
1862                     }else if(IS_8X16(pict->mb_type[mb_index])){
1863                       int i;
1864                       for(i=0; i<2; i++){
1865                         int sx=mb_x*16 + 4 + 8*i;
1866                         int sy=mb_y*16 + 8;
1867                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1868                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1869                         int my=(pict->motion_val[direction][xy][1]>>shift);
1870
1871                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1872                             my*=2;
1873
1874                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1875                       }
1876                     }else{
1877                       int sx= mb_x*16 + 8;
1878                       int sy= mb_y*16 + 8;
1879                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1880                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1881                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1882                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1883                     }
1884                   }
1885                 }
1886                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1887                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1888                     int y;
1889                     for(y=0; y<8; y++){
1890                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1891                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1892                     }
1893                 }
1894                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1895                     int mb_type= pict->mb_type[mb_index];
1896                     uint64_t u,v;
1897                     int y;
1898 #define COLOR(theta, r)\
1899 u= (int)(128 + r*cos(theta*3.141592/180));\
1900 v= (int)(128 + r*sin(theta*3.141592/180));
1901
1902
1903                     u=v=128;
1904                     if(IS_PCM(mb_type)){
1905                         COLOR(120,48)
1906                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1907                         COLOR(30,48)
1908                     }else if(IS_INTRA4x4(mb_type)){
1909                         COLOR(90,48)
1910                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1911 //                        COLOR(120,48)
1912                     }else if(IS_DIRECT(mb_type)){
1913                         COLOR(150,48)
1914                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1915                         COLOR(170,48)
1916                     }else if(IS_GMC(mb_type)){
1917                         COLOR(190,48)
1918                     }else if(IS_SKIP(mb_type)){
1919 //                        COLOR(180,48)
1920                     }else if(!USES_LIST(mb_type, 1)){
1921                         COLOR(240,48)
1922                     }else if(!USES_LIST(mb_type, 0)){
1923                         COLOR(0,48)
1924                     }else{
1925                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1926                         COLOR(300,48)
1927                     }
1928
1929                     u*= 0x0101010101010101ULL;
1930                     v*= 0x0101010101010101ULL;
1931                     for(y=0; y<8; y++){
1932                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1933                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1934                     }
1935
1936                     //segmentation
1937                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1940                     }
1941                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1942                         for(y=0; y<16; y++)
1943                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1944                     }
1945                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1946                         int dm= 1 << (mv_sample_log2-2);
1947                         for(i=0; i<4; i++){
1948                             int sx= mb_x*16 + 8*(i&1);
1949                             int sy= mb_y*16 + 8*(i>>1);
1950                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1951                             //FIXME bidir
1952                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1953                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1954                                 for(y=0; y<8; y++)
1955                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1956                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1957                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1958                         }
1959                     }
1960
1961                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1962                         // hmm
1963                     }
1964                 }
1965                 s->mbskip_table[mb_index]=0;
1966             }
1967         }
1968     }
1969 }
1970
1971 #ifdef CONFIG_ENCODERS
1972
1973 static int get_sae(uint8_t *src, int ref, int stride){
1974     int x,y;
1975     int acc=0;
1976
1977     for(y=0; y<16; y++){
1978         for(x=0; x<16; x++){
1979             acc+= ABS(src[x+y*stride] - ref);
1980         }
1981     }
1982
1983     return acc;
1984 }
1985
1986 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1987     int x, y, w, h;
1988     int acc=0;
1989
1990     w= s->width &~15;
1991     h= s->height&~15;
1992
1993     for(y=0; y<h; y+=16){
1994         for(x=0; x<w; x+=16){
1995             int offset= x + y*stride;
1996             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1997             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1998             int sae = get_sae(src + offset, mean, stride);
1999
2000             acc+= sae + 500 < sad;
2001         }
2002     }
2003     return acc;
2004 }
2005
2006
2007 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2008     AVFrame *pic=NULL;
2009     int64_t pts;
2010     int i;
2011     const int encoding_delay= s->max_b_frames;
2012     int direct=1;
2013
2014     if(pic_arg){
2015         pts= pic_arg->pts;
2016         pic_arg->display_picture_number= s->input_picture_number++;
2017
2018         if(pts != AV_NOPTS_VALUE){
2019             if(s->user_specified_pts != AV_NOPTS_VALUE){
2020                 int64_t time= pts;
2021                 int64_t last= s->user_specified_pts;
2022
2023                 if(time <= last){
2024                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2025                     return -1;
2026                 }
2027             }
2028             s->user_specified_pts= pts;
2029         }else{
2030             if(s->user_specified_pts != AV_NOPTS_VALUE){
2031                 s->user_specified_pts=
2032                 pts= s->user_specified_pts + 1;
2033                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2034             }else{
2035                 pts= pic_arg->display_picture_number;
2036             }
2037         }
2038     }
2039
2040   if(pic_arg){
2041     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2042     if(pic_arg->linesize[0] != s->linesize) direct=0;
2043     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2044     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2045
2046 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2047
2048     if(direct){
2049         i= ff_find_unused_picture(s, 1);
2050
2051         pic= (AVFrame*)&s->picture[i];
2052         pic->reference= 3;
2053
2054         for(i=0; i<4; i++){
2055             pic->data[i]= pic_arg->data[i];
2056             pic->linesize[i]= pic_arg->linesize[i];
2057         }
2058         alloc_picture(s, (Picture*)pic, 1);
2059     }else{
2060         int offset= 16;
2061         i= ff_find_unused_picture(s, 0);
2062
2063         pic= (AVFrame*)&s->picture[i];
2064         pic->reference= 3;
2065
2066         alloc_picture(s, (Picture*)pic, 0);
2067
2068         if(   pic->data[0] + offset == pic_arg->data[0]
2069            && pic->data[1] + offset == pic_arg->data[1]
2070            && pic->data[2] + offset == pic_arg->data[2]){
2071        // empty
2072         }else{
2073             int h_chroma_shift, v_chroma_shift;
2074             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2075
2076             for(i=0; i<3; i++){
2077                 int src_stride= pic_arg->linesize[i];
2078                 int dst_stride= i ? s->uvlinesize : s->linesize;
2079                 int h_shift= i ? h_chroma_shift : 0;
2080                 int v_shift= i ? v_chroma_shift : 0;
2081                 int w= s->width >>h_shift;
2082                 int h= s->height>>v_shift;
2083                 uint8_t *src= pic_arg->data[i];
2084                 uint8_t *dst= pic->data[i] + offset;
2085
2086                 if(src_stride==dst_stride)
2087                     memcpy(dst, src, src_stride*h);
2088                 else{
2089                     while(h--){
2090                         memcpy(dst, src, w);
2091                         dst += dst_stride;
2092                         src += src_stride;
2093                     }
2094                 }
2095             }
2096         }
2097     }
2098     copy_picture_attributes(s, pic, pic_arg);
2099     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2100   }
2101
2102     /* shift buffer entries */
2103     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2104         s->input_picture[i-1]= s->input_picture[i];
2105
2106     s->input_picture[encoding_delay]= (Picture*)pic;
2107
2108     return 0;
2109 }
2110
2111 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2112     int x, y, plane;
2113     int score=0;
2114     int64_t score64=0;
2115
2116     for(plane=0; plane<3; plane++){
2117         const int stride= p->linesize[plane];
2118         const int bw= plane ? 1 : 2;
2119         for(y=0; y<s->mb_height*bw; y++){
2120             for(x=0; x<s->mb_width*bw; x++){
2121                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2122                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2123
2124                 switch(s->avctx->frame_skip_exp){
2125                     case 0: score= FFMAX(score, v); break;
2126                     case 1: score+= ABS(v);break;
2127                     case 2: score+= v*v;break;
2128                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2129                     case 4: score64+= v*v*(int64_t)(v*v);break;
2130                 }
2131             }
2132         }
2133     }
2134
2135     if(score) score64= score;
2136
2137     if(score64 < s->avctx->frame_skip_threshold)
2138         return 1;
2139     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2140         return 1;
2141     return 0;
2142 }
2143
2144 static void select_input_picture(MpegEncContext *s){
2145     int i;
2146
2147     for(i=1; i<MAX_PICTURE_COUNT; i++)
2148         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2149     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2150
2151     /* set next picture type & ordering */
2152     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2153         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2154             s->reordered_input_picture[0]= s->input_picture[0];
2155             s->reordered_input_picture[0]->pict_type= I_TYPE;
2156             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2157         }else{
2158             int b_frames;
2159
2160             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2161                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2162                 //FIXME check that te gop check above is +-1 correct
2163 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2164
2165                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2166                         for(i=0; i<4; i++)
2167                             s->input_picture[0]->data[i]= NULL;
2168                         s->input_picture[0]->type= 0;
2169                     }else{
2170                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2171                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2172
2173                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2174                     }
2175
2176                     emms_c();
2177                     ff_vbv_update(s, 0);
2178
2179                     goto no_output_pic;
2180                 }
2181             }
2182
2183             if(s->flags&CODEC_FLAG_PASS2){
2184                 for(i=0; i<s->max_b_frames+1; i++){
2185                     int pict_num= s->input_picture[0]->display_picture_number + i;
2186
2187                     if(pict_num >= s->rc_context.num_entries)
2188                         break;
2189                     if(!s->input_picture[i]){
2190                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2191                         break;
2192                     }
2193
2194                     s->input_picture[i]->pict_type=
2195                         s->rc_context.entry[pict_num].new_pict_type;
2196                 }
2197             }
2198
2199             if(s->avctx->b_frame_strategy==0){
2200                 b_frames= s->max_b_frames;
2201                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2202             }else if(s->avctx->b_frame_strategy==1){
2203                 for(i=1; i<s->max_b_frames+1; i++){
2204                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2205                         s->input_picture[i]->b_frame_score=
2206                             get_intra_count(s, s->input_picture[i  ]->data[0],
2207                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2208                     }
2209                 }
2210                 for(i=0; i<s->max_b_frames+1; i++){
2211                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2212                 }
2213
2214                 b_frames= FFMAX(0, i-1);
2215
2216                 /* reset scores */
2217                 for(i=0; i<b_frames+1; i++){
2218                     s->input_picture[i]->b_frame_score=0;
2219                 }
2220             }else{
2221                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2222                 b_frames=0;
2223             }
2224
2225             emms_c();
2226 //static int b_count=0;
2227 //b_count+= b_frames;
2228 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2229
2230             for(i= b_frames - 1; i>=0; i--){
2231                 int type= s->input_picture[i]->pict_type;
2232                 if(type && type != B_TYPE)
2233                     b_frames= i;
2234             }
2235             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2236                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2237             }
2238
2239             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2240               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2241                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2242               }else{
2243                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2244                     b_frames=0;
2245                 s->input_picture[b_frames]->pict_type= I_TYPE;
2246               }
2247             }
2248
2249             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2250                && b_frames
2251                && s->input_picture[b_frames]->pict_type== I_TYPE)
2252                 b_frames--;
2253
2254             s->reordered_input_picture[0]= s->input_picture[b_frames];
2255             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2256                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2257             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2258             for(i=0; i<b_frames; i++){
2259                 s->reordered_input_picture[i+1]= s->input_picture[i];
2260                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2261                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2262             }
2263         }
2264     }
2265 no_output_pic:
2266     if(s->reordered_input_picture[0]){
2267         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2268
2269         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2270
2271         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2272             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2273
2274             int i= ff_find_unused_picture(s, 0);
2275             Picture *pic= &s->picture[i];
2276
2277             /* mark us unused / free shared pic */
2278             for(i=0; i<4; i++)
2279                 s->reordered_input_picture[0]->data[i]= NULL;
2280             s->reordered_input_picture[0]->type= 0;
2281
2282             pic->reference              = s->reordered_input_picture[0]->reference;
2283
2284             alloc_picture(s, pic, 0);
2285
2286             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2287
2288             s->current_picture_ptr= pic;
2289         }else{
2290             // input is not a shared pix -> reuse buffer for current_pix
2291
2292             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2293                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2294
2295             s->current_picture_ptr= s->reordered_input_picture[0];
2296             for(i=0; i<4; i++){
2297                 s->new_picture.data[i]+=16;
2298             }
2299         }
2300         copy_picture(&s->current_picture, s->current_picture_ptr);
2301
2302         s->picture_number= s->new_picture.display_picture_number;
2303 //printf("dpn:%d\n", s->picture_number);
2304     }else{
2305        memset(&s->new_picture, 0, sizeof(Picture));
2306     }
2307 }
2308
2309 int MPV_encode_picture(AVCodecContext *avctx,
2310                        unsigned char *buf, int buf_size, void *data)
2311 {
2312     MpegEncContext *s = avctx->priv_data;
2313     AVFrame *pic_arg = data;
2314     int i, stuffing_count;
2315
2316     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2317         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2318         return -1;
2319     }
2320
2321     for(i=0; i<avctx->thread_count; i++){
2322         int start_y= s->thread_context[i]->start_mb_y;
2323         int   end_y= s->thread_context[i]->  end_mb_y;
2324         int h= s->mb_height;
2325         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2326         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2327
2328         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2329     }
2330
2331     s->picture_in_gop_number++;
2332
2333     if(load_input_picture(s, pic_arg) < 0)
2334         return -1;
2335
2336     select_input_picture(s);
2337
2338     /* output? */
2339     if(s->new_picture.data[0]){
2340         s->pict_type= s->new_picture.pict_type;
2341 //emms_c();
2342 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2343         MPV_frame_start(s, avctx);
2344
2345         encode_picture(s, s->picture_number);
2346
2347         avctx->real_pict_num  = s->picture_number;
2348         avctx->header_bits = s->header_bits;
2349         avctx->mv_bits     = s->mv_bits;
2350         avctx->misc_bits   = s->misc_bits;
2351         avctx->i_tex_bits  = s->i_tex_bits;
2352         avctx->p_tex_bits  = s->p_tex_bits;
2353         avctx->i_count     = s->i_count;
2354         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2355         avctx->skip_count  = s->skip_count;
2356
2357         MPV_frame_end(s);
2358
2359         if (s->out_format == FMT_MJPEG)
2360             mjpeg_picture_trailer(s);
2361
2362         if(s->flags&CODEC_FLAG_PASS1)
2363             ff_write_pass1_stats(s);
2364
2365         for(i=0; i<4; i++){
2366             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2367             avctx->error[i] += s->current_picture_ptr->error[i];
2368         }
2369
2370         if(s->flags&CODEC_FLAG_PASS1)
2371             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2372         flush_put_bits(&s->pb);
2373         s->frame_bits  = put_bits_count(&s->pb);
2374
2375         stuffing_count= ff_vbv_update(s, s->frame_bits);
2376         if(stuffing_count){
2377             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2378                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2379                 return -1;
2380             }
2381
2382             switch(s->codec_id){
2383             case CODEC_ID_MPEG1VIDEO:
2384             case CODEC_ID_MPEG2VIDEO:
2385                 while(stuffing_count--){
2386                     put_bits(&s->pb, 8, 0);
2387                 }
2388             break;
2389             case CODEC_ID_MPEG4:
2390                 put_bits(&s->pb, 16, 0);
2391                 put_bits(&s->pb, 16, 0x1C3);
2392                 stuffing_count -= 4;
2393                 while(stuffing_count--){
2394                     put_bits(&s->pb, 8, 0xFF);
2395                 }
2396             break;
2397             default:
2398                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2399             }
2400             flush_put_bits(&s->pb);
2401             s->frame_bits  = put_bits_count(&s->pb);
2402         }
2403
2404         /* update mpeg1/2 vbv_delay for CBR */
2405         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2406            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2407             int vbv_delay;
2408
2409             assert(s->repeat_first_field==0);
2410
2411             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2412             assert(vbv_delay < 0xFFFF);
2413
2414             s->vbv_delay_ptr[0] &= 0xF8;
2415             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2416             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2417             s->vbv_delay_ptr[2] &= 0x07;
2418             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2419         }
2420         s->total_bits += s->frame_bits;
2421         avctx->frame_bits  = s->frame_bits;
2422     }else{
2423         assert((pbBufPtr(&s->pb) == s->pb.buf));
2424         s->frame_bits=0;
2425     }
2426     assert((s->frame_bits&7)==0);
2427
2428     return s->frame_bits/8;
2429 }
2430
2431 #endif //CONFIG_ENCODERS
2432
2433 static inline void gmc1_motion(MpegEncContext *s,
2434                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2435                                uint8_t **ref_picture)
2436 {
2437     uint8_t *ptr;
2438     int offset, src_x, src_y, linesize, uvlinesize;
2439     int motion_x, motion_y;
2440     int emu=0;
2441
2442     motion_x= s->sprite_offset[0][0];
2443     motion_y= s->sprite_offset[0][1];
2444     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2445     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2446     motion_x<<=(3-s->sprite_warping_accuracy);
2447     motion_y<<=(3-s->sprite_warping_accuracy);
2448     src_x = clip(src_x, -16, s->width);
2449     if (src_x == s->width)
2450         motion_x =0;
2451     src_y = clip(src_y, -16, s->height);
2452     if (src_y == s->height)
2453         motion_y =0;
2454
2455     linesize = s->linesize;
2456     uvlinesize = s->uvlinesize;
2457
2458     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2459
2460     if(s->flags&CODEC_FLAG_EMU_EDGE){
2461         if(   (unsigned)src_x >= s->h_edge_pos - 17
2462            || (unsigned)src_y >= s->v_edge_pos - 17){
2463             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2464             ptr= s->edge_emu_buffer;
2465         }
2466     }
2467
2468     if((motion_x|motion_y)&7){
2469         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2470         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2471     }else{
2472         int dxy;
2473
2474         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2475         if (s->no_rounding){
2476             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2477         }else{
2478             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2479         }
2480     }
2481
2482     if(s->flags&CODEC_FLAG_GRAY) return;
2483
2484     motion_x= s->sprite_offset[1][0];
2485     motion_y= s->sprite_offset[1][1];
2486     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2487     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2488     motion_x<<=(3-s->sprite_warping_accuracy);
2489     motion_y<<=(3-s->sprite_warping_accuracy);
2490     src_x = clip(src_x, -8, s->width>>1);
2491     if (src_x == s->width>>1)
2492         motion_x =0;
2493     src_y = clip(src_y, -8, s->height>>1);
2494     if (src_y == s->height>>1)
2495         motion_y =0;
2496
2497     offset = (src_y * uvlinesize) + src_x;
2498     ptr = ref_picture[1] + offset;
2499     if(s->flags&CODEC_FLAG_EMU_EDGE){
2500         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2501            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2502             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2503             ptr= s->edge_emu_buffer;
2504             emu=1;
2505         }
2506     }
2507     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2508
2509     ptr = ref_picture[2] + offset;
2510     if(emu){
2511         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2512         ptr= s->edge_emu_buffer;
2513     }
2514     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2515
2516     return;
2517 }
2518
2519 static inline void gmc_motion(MpegEncContext *s,
2520                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2521                                uint8_t **ref_picture)
2522 {
2523     uint8_t *ptr;
2524     int linesize, uvlinesize;
2525     const int a= s->sprite_warping_accuracy;
2526     int ox, oy;
2527
2528     linesize = s->linesize;
2529     uvlinesize = s->uvlinesize;
2530
2531     ptr = ref_picture[0];
2532
2533     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2534     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2535
2536     s->dsp.gmc(dest_y, ptr, linesize, 16,
2537            ox,
2538            oy,
2539            s->sprite_delta[0][0], s->sprite_delta[0][1],
2540            s->sprite_delta[1][0], s->sprite_delta[1][1],
2541            a+1, (1<<(2*a+1)) - s->no_rounding,
2542            s->h_edge_pos, s->v_edge_pos);
2543     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2544            ox + s->sprite_delta[0][0]*8,
2545            oy + s->sprite_delta[1][0]*8,
2546            s->sprite_delta[0][0], s->sprite_delta[0][1],
2547            s->sprite_delta[1][0], s->sprite_delta[1][1],
2548            a+1, (1<<(2*a+1)) - s->no_rounding,
2549            s->h_edge_pos, s->v_edge_pos);
2550
2551     if(s->flags&CODEC_FLAG_GRAY) return;
2552
2553     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2554     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2555
2556     ptr = ref_picture[1];
2557     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2558            ox,
2559            oy,
2560            s->sprite_delta[0][0], s->sprite_delta[0][1],
2561            s->sprite_delta[1][0], s->sprite_delta[1][1],
2562            a+1, (1<<(2*a+1)) - s->no_rounding,
2563            s->h_edge_pos>>1, s->v_edge_pos>>1);
2564
2565     ptr = ref_picture[2];
2566     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2567            ox,
2568            oy,
2569            s->sprite_delta[0][0], s->sprite_delta[0][1],
2570            s->sprite_delta[1][0], s->sprite_delta[1][1],
2571            a+1, (1<<(2*a+1)) - s->no_rounding,
2572            s->h_edge_pos>>1, s->v_edge_pos>>1);
2573 }
2574
2575 /**
2576  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2577  * @param buf destination buffer
2578  * @param src source buffer
2579  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2580  * @param block_w width of block
2581  * @param block_h height of block
2582  * @param src_x x coordinate of the top left sample of the block in the source buffer
2583  * @param src_y y coordinate of the top left sample of the block in the source buffer
2584  * @param w width of the source buffer
2585  * @param h height of the source buffer
2586  */
2587 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2588                                     int src_x, int src_y, int w, int h){
2589     int x, y;
2590     int start_y, start_x, end_y, end_x;
2591
2592     if(src_y>= h){
2593         src+= (h-1-src_y)*linesize;
2594         src_y=h-1;
2595     }else if(src_y<=-block_h){
2596         src+= (1-block_h-src_y)*linesize;
2597         src_y=1-block_h;
2598     }
2599     if(src_x>= w){
2600         src+= (w-1-src_x);
2601         src_x=w-1;
2602     }else if(src_x<=-block_w){
2603         src+= (1-block_w-src_x);
2604         src_x=1-block_w;
2605     }
2606
2607     start_y= FFMAX(0, -src_y);
2608     start_x= FFMAX(0, -src_x);
2609     end_y= FFMIN(block_h, h-src_y);
2610     end_x= FFMIN(block_w, w-src_x);
2611
2612     // copy existing part
2613     for(y=start_y; y<end_y; y++){
2614         for(x=start_x; x<end_x; x++){
2615             buf[x + y*linesize]= src[x + y*linesize];
2616         }
2617     }
2618
2619     //top
2620     for(y=0; y<start_y; y++){
2621         for(x=start_x; x<end_x; x++){
2622             buf[x + y*linesize]= buf[x + start_y*linesize];
2623         }
2624     }
2625
2626     //bottom
2627     for(y=end_y; y<block_h; y++){
2628         for(x=start_x; x<end_x; x++){
2629             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2630         }
2631     }
2632
2633     for(y=0; y<block_h; y++){
2634        //left
2635         for(x=0; x<start_x; x++){
2636             buf[x + y*linesize]= buf[start_x + y*linesize];
2637         }
2638
2639        //right
2640         for(x=end_x; x<block_w; x++){
2641             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2642         }
2643     }
2644 }
2645
2646 static inline int hpel_motion(MpegEncContext *s,
2647                                   uint8_t *dest, uint8_t *src,
2648                                   int field_based, int field_select,
2649                                   int src_x, int src_y,
2650                                   int width, int height, int stride,
2651                                   int h_edge_pos, int v_edge_pos,
2652                                   int w, int h, op_pixels_func *pix_op,
2653                                   int motion_x, int motion_y)
2654 {
2655     int dxy;
2656     int emu=0;
2657
2658     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2659     src_x += motion_x >> 1;
2660     src_y += motion_y >> 1;
2661
2662     /* WARNING: do no forget half pels */
2663     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2664     if (src_x == width)
2665         dxy &= ~1;
2666     src_y = clip(src_y, -16, height);
2667     if (src_y == height)
2668         dxy &= ~2;
2669     src += src_y * stride + src_x;
2670
2671     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2672         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2673            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2674             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2675                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2676             src= s->edge_emu_buffer;
2677             emu=1;
2678         }
2679     }
2680     if(field_select)
2681         src += s->linesize;
2682     pix_op[dxy](dest, src, stride, h);
2683     return emu;
2684 }
2685
2686 static inline int hpel_motion_lowres(MpegEncContext *s,
2687                                   uint8_t *dest, uint8_t *src,
2688                                   int field_based, int field_select,
2689                                   int src_x, int src_y,
2690                                   int width, int height, int stride,
2691                                   int h_edge_pos, int v_edge_pos,
2692                                   int w, int h, h264_chroma_mc_func *pix_op,
2693                                   int motion_x, int motion_y)
2694 {
2695     const int lowres= s->avctx->lowres;
2696     const int s_mask= (2<<lowres)-1;
2697     int emu=0;
2698     int sx, sy;
2699
2700     if(s->quarter_sample){
2701         motion_x/=2;
2702         motion_y/=2;
2703     }
2704
2705     sx= motion_x & s_mask;
2706     sy= motion_y & s_mask;
2707     src_x += motion_x >> (lowres+1);
2708     src_y += motion_y >> (lowres+1);
2709
2710     src += src_y * stride + src_x;
2711
2712     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2713        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2714         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2715                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2716         src= s->edge_emu_buffer;
2717         emu=1;
2718     }
2719
2720     sx <<= 2 - lowres;
2721     sy <<= 2 - lowres;
2722     if(field_select)
2723         src += s->linesize;
2724     pix_op[lowres](dest, src, stride, h, sx, sy);
2725     return emu;
2726 }
2727
2728 /* apply one mpeg motion vector to the three components */
2729 static always_inline void mpeg_motion(MpegEncContext *s,
2730                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2731                                int field_based, int bottom_field, int field_select,
2732                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2733                                int motion_x, int motion_y, int h)
2734 {
2735     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2736     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2737
2738 #if 0
2739 if(s->quarter_sample)
2740 {
2741     motion_x>>=1;
2742     motion_y>>=1;
2743 }
2744 #endif
2745
2746     v_edge_pos = s->v_edge_pos >> field_based;
2747     linesize   = s->current_picture.linesize[0] << field_based;
2748     uvlinesize = s->current_picture.linesize[1] << field_based;
2749
2750     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2751     src_x = s->mb_x* 16               + (motion_x >> 1);
2752     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2753
2754     if (s->out_format == FMT_H263) {
2755         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2756             mx = (motion_x>>1)|(motion_x&1);
2757             my = motion_y >>1;
2758             uvdxy = ((my & 1) << 1) | (mx & 1);
2759             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2760             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2761         }else{
2762             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2763             uvsrc_x = src_x>>1;
2764             uvsrc_y = src_y>>1;
2765         }
2766     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2767         mx = motion_x / 4;
2768         my = motion_y / 4;
2769         uvdxy = 0;
2770         uvsrc_x = s->mb_x*8 + mx;
2771         uvsrc_y = s->mb_y*8 + my;
2772     } else {
2773         if(s->chroma_y_shift){
2774             mx = motion_x / 2;
2775             my = motion_y / 2;
2776             uvdxy = ((my & 1) << 1) | (mx & 1);
2777             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2778             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2779         } else {
2780             if(s->chroma_x_shift){
2781             //Chroma422
2782                 mx = motion_x / 2;
2783                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2784                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2785                 uvsrc_y = src_y;
2786             } else {
2787             //Chroma444
2788                 uvdxy = dxy;
2789                 uvsrc_x = src_x;
2790                 uvsrc_y = src_y;
2791             }
2792         }
2793     }
2794
2795     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2796     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2797     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2798
2799     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2800        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2801             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2802                s->codec_id == CODEC_ID_MPEG1VIDEO){
2803                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2804                 return ;
2805             }
2806             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2807                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2808             ptr_y = s->edge_emu_buffer;
2809             if(!(s->flags&CODEC_FLAG_GRAY)){
2810                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2811                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2812                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2813                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2814                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2815                 ptr_cb= uvbuf;
2816                 ptr_cr= uvbuf+16;
2817             }
2818     }
2819
2820     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2821         dest_y += s->linesize;
2822         dest_cb+= s->uvlinesize;
2823         dest_cr+= s->uvlinesize;
2824     }
2825
2826     if(field_select){
2827         ptr_y += s->linesize;
2828         ptr_cb+= s->uvlinesize;
2829         ptr_cr+= s->uvlinesize;
2830     }
2831
2832     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2833
2834     if(!(s->flags&CODEC_FLAG_GRAY)){
2835         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2836         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2837     }
2838 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2839     if(s->out_format == FMT_H261){
2840         ff_h261_loop_filter(s);
2841     }
2842 #endif
2843 }
2844
2845 /* apply one mpeg motion vector to the three components */
2846 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2847                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2848                                int field_based, int bottom_field, int field_select,
2849                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2850                                int motion_x, int motion_y, int h)
2851 {
2852     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2853     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2854     const int lowres= s->avctx->lowres;
2855     const int block_s= 8>>lowres;
2856     const int s_mask= (2<<lowres)-1;
2857     const int h_edge_pos = s->h_edge_pos >> lowres;
2858     const int v_edge_pos = s->v_edge_pos >> lowres;
2859     linesize   = s->current_picture.linesize[0] << field_based;
2860     uvlinesize = s->current_picture.linesize[1] << field_based;
2861
2862     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2863         motion_x/=2;
2864         motion_y/=2;
2865     }
2866
2867     if(field_based){
2868         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2869     }
2870
2871     sx= motion_x & s_mask;
2872     sy= motion_y & s_mask;
2873     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2874     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2875
2876     if (s->out_format == FMT_H263) {
2877         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2878         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2879         uvsrc_x = src_x>>1;
2880         uvsrc_y = src_y>>1;
2881     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2882         mx = motion_x / 4;
2883         my = motion_y / 4;
2884         uvsx = (2*mx) & s_mask;
2885         uvsy = (2*my) & s_mask;
2886         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2887         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2888     } else {
2889         mx = motion_x / 2;
2890         my = motion_y / 2;
2891         uvsx = mx & s_mask;
2892         uvsy = my & s_mask;
2893         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2894         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2895     }
2896
2897     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2898     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2899     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2900
2901     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2902        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2903             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2904                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2905             ptr_y = s->edge_emu_buffer;
2906             if(!(s->flags&CODEC_FLAG_GRAY)){
2907                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2908                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2909                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2910                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2911                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2912                 ptr_cb= uvbuf;
2913                 ptr_cr= uvbuf+16;
2914             }
2915     }
2916
2917     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2918         dest_y += s->linesize;
2919         dest_cb+= s->uvlinesize;
2920         dest_cr+= s->uvlinesize;
2921     }
2922
2923     if(field_select){
2924         ptr_y += s->linesize;
2925         ptr_cb+= s->uvlinesize;
2926         ptr_cr+= s->uvlinesize;
2927     }
2928
2929     sx <<= 2 - lowres;
2930     sy <<= 2 - lowres;
2931     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2932
2933     if(!(s->flags&CODEC_FLAG_GRAY)){
2934         uvsx <<= 2 - lowres;
2935         uvsy <<= 2 - lowres;
2936         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2937         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2938     }
2939     //FIXME h261 lowres loop filter
2940 }
2941
2942 //FIXME move to dsputil, avg variant, 16x16 version
2943 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2944     int x;
2945     uint8_t * const top   = src[1];
2946     uint8_t * const left  = src[2];
2947     uint8_t * const mid   = src[0];
2948     uint8_t * const right = src[3];
2949     uint8_t * const bottom= src[4];
2950 #define OBMC_FILTER(x, t, l, m, r, b)\
2951     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2952 #define OBMC_FILTER4(x, t, l, m, r, b)\
2953     OBMC_FILTER(x         , t, l, m, r, b);\
2954     OBMC_FILTER(x+1       , t, l, m, r, b);\
2955     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2956     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2957
2958     x=0;
2959     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2960     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2961     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2962     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2963     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2964     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2965     x+= stride;
2966     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2967     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2968     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2969     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2970     x+= stride;
2971     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2972     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2973     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2974     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2975     x+= 2*stride;
2976     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2977     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2978     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2979     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2980     x+= 2*stride;
2981     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2982     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2983     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2984     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2985     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2986     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2987     x+= stride;
2988     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2989     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2990     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2991     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2992 }
2993
2994 /* obmc for 1 8x8 luma block */
2995 static inline void obmc_motion(MpegEncContext *s,
2996                                uint8_t *dest, uint8_t *src,
2997                                int src_x, int src_y,
2998                                op_pixels_func *pix_op,
2999                                int16_t mv[5][2]/* mid top left right bottom*/)
3000 #define MID    0
3001 {
3002     int i;
3003     uint8_t *ptr[5];
3004
3005     assert(s->quarter_sample==0);
3006
3007     for(i=0; i<5; i++){
3008         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3009             ptr[i]= ptr[MID];
3010         }else{
3011             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3012             hpel_motion(s, ptr[i], src, 0, 0,
3013                         src_x, src_y,
3014                         s->width, s->height, s->linesize,
3015                         s->h_edge_pos, s->v_edge_pos,
3016                         8, 8, pix_op,
3017                         mv[i][0], mv[i][1]);
3018         }
3019     }
3020
3021     put_obmc(dest, ptr, s->linesize);
3022 }
3023
3024 static inline void qpel_motion(MpegEncContext *s,
3025                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3026                                int field_based, int bottom_field, int field_select,
3027                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3028                                qpel_mc_func (*qpix_op)[16],
3029                                int motion_x, int motion_y, int h)
3030 {
3031     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3032     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3033
3034     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3035     src_x = s->mb_x *  16                 + (motion_x >> 2);
3036     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3037
3038     v_edge_pos = s->v_edge_pos >> field_based;
3039     linesize = s->linesize << field_based;
3040     uvlinesize = s->uvlinesize << field_based;
3041
3042     if(field_based){
3043         mx= motion_x/2;
3044         my= motion_y>>1;
3045     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3046         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3047         mx= (motion_x>>1) + rtab[motion_x&7];
3048         my= (motion_y>>1) + rtab[motion_y&7];
3049     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3050         mx= (motion_x>>1)|(motion_x&1);
3051         my= (motion_y>>1)|(motion_y&1);
3052     }else{
3053         mx= motion_x/2;
3054         my= motion_y/2;
3055     }
3056     mx= (mx>>1)|(mx&1);
3057     my= (my>>1)|(my&1);
3058
3059     uvdxy= (mx&1) | ((my&1)<<1);
3060     mx>>=1;
3061     my>>=1;
3062
3063     uvsrc_x = s->mb_x *  8                 + mx;
3064     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3065
3066     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3067     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3068     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3069
3070     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3071        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3072         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3073                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3074         ptr_y= s->edge_emu_buffer;
3075         if(!(s->flags&CODEC_FLAG_GRAY)){
3076             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3077             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3078                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3079             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3080                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3081             ptr_cb= uvbuf;
3082             ptr_cr= uvbuf + 16;
3083         }
3084     }
3085
3086     if(!field_based)
3087         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3088     else{
3089         if(bottom_field){
3090             dest_y += s->linesize;
3091             dest_cb+= s->uvlinesize;
3092             dest_cr+= s->uvlinesize;
3093         }
3094
3095         if(field_select){
3096             ptr_y  += s->linesize;
3097             ptr_cb += s->uvlinesize;
3098             ptr_cr += s->uvlinesize;
3099         }
3100         //damn interlaced mode
3101         //FIXME boundary mirroring is not exactly correct here
3102         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3103         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3104     }
3105     if(!(s->flags&CODEC_FLAG_GRAY)){
3106         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3107         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3108     }
3109 }
3110
3111 inline int ff_h263_round_chroma(int x){
3112     if (x >= 0)
3113         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3114     else {
3115         x = -x;
3116         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3117     }
3118 }
3119
3120 /**
3121  * h263 chorma 4mv motion compensation.
3122  */
3123 static inline void chroma_4mv_motion(MpegEncContext *s,
3124                                      uint8_t *dest_cb, uint8_t *dest_cr,
3125                                      uint8_t **ref_picture,
3126                                      op_pixels_func *pix_op,
3127                                      int mx, int my){
3128     int dxy, emu=0, src_x, src_y, offset;
3129     uint8_t *ptr;
3130
3131     /* In case of 8X8, we construct a single chroma motion vector
3132        with a special rounding */
3133     mx= ff_h263_round_chroma(mx);
3134     my= ff_h263_round_chroma(my);
3135
3136     dxy = ((my & 1) << 1) | (mx & 1);
3137     mx >>= 1;
3138     my >>= 1;
3139
3140     src_x = s->mb_x * 8 + mx;
3141     src_y = s->mb_y * 8 + my;
3142     src_x = clip(src_x, -8, s->width/2);
3143     if (src_x == s->width/2)
3144         dxy &= ~1;
3145     src_y = clip(src_y, -8, s->height/2);
3146     if (src_y == s->height/2)
3147         dxy &= ~2;
3148
3149     offset = (src_y * (s->uvlinesize)) + src_x;
3150     ptr = ref_picture[1] + offset;
3151     if(s->flags&CODEC_FLAG_EMU_EDGE){
3152         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3153            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3154             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3155             ptr= s->edge_emu_buffer;
3156             emu=1;
3157         }
3158     }
3159     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3160
3161     ptr = ref_picture[2] + offset;
3162     if(emu){
3163         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3164         ptr= s->edge_emu_buffer;
3165     }
3166     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3167 }
3168
3169 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3170                                      uint8_t *dest_cb, uint8_t *dest_cr,
3171                                      uint8_t **ref_picture,
3172                                      h264_chroma_mc_func *pix_op,
3173                                      int mx, int my){
3174     const int lowres= s->avctx->lowres;
3175     const int block_s= 8>>lowres;
3176     const int s_mask= (2<<lowres)-1;
3177     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3178     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3179     int emu=0, src_x, src_y, offset, sx, sy;
3180     uint8_t *ptr;
3181
3182     if(s->quarter_sample){
3183         mx/=2;
3184         my/=2;
3185     }
3186
3187     /* In case of 8X8, we construct a single chroma motion vector
3188        with a special rounding */
3189     mx= ff_h263_round_chroma(mx);
3190     my= ff_h263_round_chroma(my);
3191
3192     sx= mx & s_mask;
3193     sy= my & s_mask;
3194     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3195     src_y = s->mb_y*block_s + (my >> (lowres+1));
3196
3197     offset = src_y * s->uvlinesize + src_x;
3198     ptr = ref_picture[1] + offset;
3199     if(s->flags&CODEC_FLAG_EMU_EDGE){
3200         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3201            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3202             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3203             ptr= s->edge_emu_buffer;
3204             emu=1;
3205         }
3206     }
3207     sx <<= 2 - lowres;
3208     sy <<= 2 - lowres;
3209     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3210
3211     ptr = ref_picture[2] + offset;
3212     if(emu){
3213         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3214         ptr= s->edge_emu_buffer;
3215     }
3216     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3217 }
3218
3219 /**
3220  * motion compensation of a single macroblock
3221  * @param s context
3222  * @param dest_y luma destination pointer
3223  * @param dest_cb chroma cb/u destination pointer
3224  * @param dest_cr chroma cr/v destination pointer
3225  * @param dir direction (0->forward, 1->backward)
3226  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3227  * @param pic_op halfpel motion compensation function (average or put normally)
3228  * @param pic_op qpel motion compensation function (average or put normally)
3229  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3230  */
3231 static inline void MPV_motion(MpegEncContext *s,
3232                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3233                               int dir, uint8_t **ref_picture,
3234                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3235 {
3236     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3237     int mb_x, mb_y, i;
3238     uint8_t *ptr, *dest;
3239
3240     mb_x = s->mb_x;
3241     mb_y = s->mb_y;
3242
3243     if(s->obmc && s->pict_type != B_TYPE){
3244         int16_t mv_cache[4][4][2];
3245         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3246         const int mot_stride= s->b8_stride;
3247         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3248
3249         assert(!s->mb_skipped);
3250
3251         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3252         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3253         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3254
3255         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3256             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3257         }else{
3258             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3259         }
3260
3261         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3262             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3263             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3264         }else{
3265             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3266             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3267         }
3268
3269         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3270             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3271             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3272         }else{
3273             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3274             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3275         }
3276
3277         mx = 0;
3278         my = 0;
3279         for(i=0;i<4;i++) {
3280             const int x= (i&1)+1;
3281             const int y= (i>>1)+1;
3282             int16_t mv[5][2]= {
3283                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3284                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3285                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3286                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3287                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3288             //FIXME cleanup
3289             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3290                         ref_picture[0],
3291                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3292                         pix_op[1],
3293                         mv);
3294
3295             mx += mv[0][0];
3296             my += mv[0][1];
3297         }
3298         if(!(s->flags&CODEC_FLAG_GRAY))
3299             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3300
3301         return;
3302     }
3303
3304     switch(s->mv_type) {
3305     case MV_TYPE_16X16:
3306         if(s->mcsel){
3307             if(s->real_sprite_warping_points==1){
3308                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3309                             ref_picture);
3310             }else{
3311                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3312                             ref_picture);
3313             }
3314         }else if(s->quarter_sample){
3315             qpel_motion(s, dest_y, dest_cb, dest_cr,
3316                         0, 0, 0,
3317                         ref_picture, pix_op, qpix_op,
3318                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3319         }else if(s->mspel){
3320             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3321                         ref_picture, pix_op,
3322                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3323         }else
3324         {
3325             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3326                         0, 0, 0,
3327                         ref_picture, pix_op,
3328                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3329         }
3330         break;
3331     case MV_TYPE_8X8:
3332         mx = 0;
3333         my = 0;
3334         if(s->quarter_sample){
3335             for(i=0;i<4;i++) {
3336                 motion_x = s->mv[dir][i][0];
3337                 motion_y = s->mv[dir][i][1];
3338
3339                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3340                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3341                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3342
3343                 /* WARNING: do no forget half pels */
3344                 src_x = clip(src_x, -16, s->width);
3345                 if (src_x == s->width)
3346                     dxy &= ~3;
3347                 src_y = clip(src_y, -16, s->height);
3348                 if (src_y == s->height)
3349                     dxy &= ~12;
3350
3351                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3352                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3353                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3354                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3355                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3356                         ptr= s->edge_emu_buffer;
3357                     }
3358                 }
3359                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3360                 qpix_op[1][dxy](dest, ptr, s->linesize);
3361
3362                 mx += s->mv[dir][i][0]/2;
3363                 my += s->mv[dir][i][1]/2;
3364             }
3365         }else{
3366             for(i=0;i<4;i++) {
3367                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3368                             ref_picture[0], 0, 0,
3369                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3370                             s->width, s->height, s->linesize,
3371                             s->h_edge_pos, s->v_edge_pos,
3372                             8, 8, pix_op[1],
3373                             s->mv[dir][i][0], s->mv[dir][i][1]);
3374
3375                 mx += s->mv[dir][i][0];
3376                 my += s->mv[dir][i][1];
3377             }
3378         }
3379
3380         if(!(s->flags&CODEC_FLAG_GRAY))
3381             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3382         break;
3383     case MV_TYPE_FIELD:
3384         if (s->picture_structure == PICT_FRAME) {
3385             if(s->quarter_sample){
3386                 for(i=0; i<2; i++){
3387                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3388                                 1, i, s->field_select[dir][i],
3389                                 ref_picture, pix_op, qpix_op,
3390                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3391                 }
3392             }else{
3393                 /* top field */
3394                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3395                             1, 0, s->field_select[dir][0],
3396                             ref_picture, pix_op,
3397                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3398                 /* bottom field */
3399                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3400                             1, 1, s->field_select[dir][1],
3401                             ref_picture, pix_op,
3402                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3403             }
3404         } else {
3405             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3406                 ref_picture= s->current_picture_ptr->data;
3407             }
3408
3409             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3410                         0, 0, s->field_select[dir][0],
3411                         ref_picture, pix_op,
3412                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3413         }
3414         break;
3415     case MV_TYPE_16X8:
3416         for(i=0; i<2; i++){
3417             uint8_t ** ref2picture;
3418
3419             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3420                 ref2picture= ref_picture;
3421             }else{
3422                 ref2picture= s->current_picture_ptr->data;
3423             }
3424
3425             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3426                         0, 0, s->field_select[dir][i],
3427                         ref2picture, pix_op,
3428                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3429
3430             dest_y += 16*s->linesize;
3431             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3432             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3433         }
3434         break;
3435     case MV_TYPE_DMV:
3436         if(s->picture_structure == PICT_FRAME){
3437             for(i=0; i<2; i++){
3438                 int j;
3439                 for(j=0; j<2; j++){
3440                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3441                                 1, j, j^i,
3442                                 ref_picture, pix_op,
3443                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3444                 }
3445                 pix_op = s->dsp.avg_pixels_tab;
3446             }
3447         }else{
3448             for(i=0; i<2; i++){
3449                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3450                             0, 0, s->picture_structure != i+1,
3451                             ref_picture, pix_op,
3452                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3453
3454                 // after put we make avg of the same block
3455                 pix_op=s->dsp.avg_pixels_tab;
3456
3457                 //opposite parity is always in the same frame if this is second field
3458                 if(!s->first_field){
3459                     ref_picture = s->current_picture_ptr->data;
3460                 }
3461             }
3462         }
3463     break;
3464     default: assert(0);
3465     }
3466 }
3467
3468 /**
3469  * motion compensation of a single macroblock
3470  * @param s context
3471  * @param dest_y luma destination pointer
3472  * @param dest_cb chroma cb/u destination pointer
3473  * @param dest_cr chroma cr/v destination pointer
3474  * @param dir direction (0->forward, 1->backward)
3475  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3476  * @param pic_op halfpel motion compensation function (average or put normally)
3477  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3478  */
3479 static inline void MPV_motion_lowres(MpegEncContext *s,
3480                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3481                               int dir, uint8_t **ref_picture,
3482                               h264_chroma_mc_func *pix_op)
3483 {
3484     int mx, my;
3485     int mb_x, mb_y, i;
3486     const int lowres= s->avctx->lowres;
3487     const int block_s= 8>>lowres;
3488
3489     mb_x = s->mb_x;
3490     mb_y = s->mb_y;
3491
3492     switch(s->mv_type) {
3493     case MV_TYPE_16X16:
3494         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3495                     0, 0, 0,
3496                     ref_picture, pix_op,
3497                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3498         break;
3499     case MV_TYPE_8X8:
3500         mx = 0;
3501         my = 0;
3502             for(i=0;i<4;i++) {
3503                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3504                             ref_picture[0], 0, 0,
3505                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3506                             s->width, s->height, s->linesize,
3507                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3508                             block_s, block_s, pix_op,
3509                             s->mv[dir][i][0], s->mv[dir][i][1]);
3510
3511                 mx += s->mv[dir][i][0];
3512                 my += s->mv[dir][i][1];
3513             }
3514
3515         if(!(s->flags&CODEC_FLAG_GRAY))
3516             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3517         break;
3518     case MV_TYPE_FIELD:
3519         if (s->picture_structure == PICT_FRAME) {
3520             /* top field */
3521             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3522                         1, 0, s->field_select[dir][0],
3523                         ref_picture, pix_op,
3524                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3525             /* bottom field */
3526             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3527                         1, 1, s->field_select[dir][1],
3528                         ref_picture, pix_op,
3529                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3530         } else {
3531             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3532                 ref_picture= s->current_picture_ptr->data;
3533             }
3534
3535             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3536                         0, 0, s->field_select[dir][0],
3537                         ref_picture, pix_op,
3538                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3539         }
3540         break;
3541     case MV_TYPE_16X8:
3542         for(i=0; i<2; i++){
3543             uint8_t ** ref2picture;
3544
3545             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3546                 ref2picture= ref_picture;
3547             }else{
3548                 ref2picture= s->current_picture_ptr->data;
3549             }
3550
3551             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3552                         0, 0, s->field_select[dir][i],
3553                         ref2picture, pix_op,
3554                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3555
3556             dest_y += 2*block_s*s->linesize;
3557             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3558             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3559         }
3560         break;
3561     case MV_TYPE_DMV:
3562         if(s->picture_structure == PICT_FRAME){
3563             for(i=0; i<2; i++){
3564                 int j;
3565                 for(j=0; j<2; j++){
3566                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3567                                 1, j, j^i,
3568                                 ref_picture, pix_op,
3569                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3570                 }
3571                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3572             }
3573         }else{
3574             for(i=0; i<2; i++){
3575                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3576                             0, 0, s->picture_structure != i+1,
3577                             ref_picture, pix_op,
3578                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3579
3580                 // after put we make avg of the same block
3581                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3582
3583                 //opposite parity is always in the same frame if this is second field
3584                 if(!s->first_field){
3585                     ref_picture = s->current_picture_ptr->data;
3586                 }
3587             }
3588         }
3589     break;
3590     default: assert(0);
3591     }
3592 }
3593
3594 /* put block[] to dest[] */
3595 static inline void put_dct(MpegEncContext *s,
3596                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3597 {
3598     s->dct_unquantize_intra(s, block, i, qscale);
3599     s->dsp.idct_put (dest, line_size, block);
3600 }
3601
3602 /* add block[] to dest[] */
3603 static inline void add_dct(MpegEncContext *s,
3604                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3605 {
3606     if (s->block_last_index[i] >= 0) {
3607         s->dsp.idct_add (dest, line_size, block);
3608     }
3609 }
3610
3611 static inline void add_dequant_dct(MpegEncContext *s,
3612                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3613 {
3614     if (s->block_last_index[i] >= 0) {
3615         s->dct_unquantize_inter(s, block, i, qscale);
3616
3617         s->dsp.idct_add (dest, line_size, block);
3618     }
3619 }
3620
3621 /**
3622  * cleans dc, ac, coded_block for the current non intra MB
3623  */
3624 void ff_clean_intra_table_entries(MpegEncContext *s)
3625 {
3626     int wrap = s->b8_stride;
3627     int xy = s->block_index[0];
3628
3629     s->dc_val[0][xy           ] =
3630     s->dc_val[0][xy + 1       ] =
3631     s->dc_val[0][xy     + wrap] =
3632     s->dc_val[0][xy + 1 + wrap] = 1024;
3633     /* ac pred */
3634     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3635     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3636     if (s->msmpeg4_version>=3) {
3637         s->coded_block[xy           ] =
3638         s->coded_block[xy + 1       ] =
3639         s->coded_block[xy     + wrap] =
3640         s->coded_block[xy + 1 + wrap] = 0;
3641     }
3642     /* chroma */
3643     wrap = s->mb_stride;
3644     xy = s->mb_x + s->mb_y * wrap;
3645     s->dc_val[1][xy] =
3646     s->dc_val[2][xy] = 1024;
3647     /* ac pred */
3648     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3649     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3650
3651     s->mbintra_table[xy]= 0;
3652 }
3653
3654 /* generic function called after a macroblock has been parsed by the
3655    decoder or after it has been encoded by the encoder.
3656
3657    Important variables used:
3658    s->mb_intra : true if intra macroblock
3659    s->mv_dir   : motion vector direction
3660    s->mv_type  : motion vector type
3661    s->mv       : motion vector
3662    s->interlaced_dct : true if interlaced dct used (mpeg2)
3663  */
3664 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3665 {
3666     int mb_x, mb_y;
3667     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3668 #ifdef HAVE_XVMC
3669     if(s->avctx->xvmc_acceleration){
3670         XVMC_decode_mb(s);//xvmc uses pblocks
3671         return;
3672     }
3673 #endif
3674
3675     mb_x = s->mb_x;
3676     mb_y = s->mb_y;
3677
3678     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3679        /* save DCT coefficients */
3680        int i,j;
3681        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3682        for(i=0; i<6; i++)
3683            for(j=0; j<64; j++)
3684                *dct++ = block[i][s->dsp.idct_permutation[j]];
3685     }
3686
3687     s->current_picture.qscale_table[mb_xy]= s->qscale;
3688
3689     /* update DC predictors for P macroblocks */
3690     if (!s->mb_intra) {
3691         if (s->h263_pred || s->h263_aic) {
3692             if(s->mbintra_table[mb_xy])
3693                 ff_clean_intra_table_entries(s);
3694         } else {
3695             s->last_dc[0] =
3696             s->last_dc[1] =
3697             s->last_dc[2] = 128 << s->intra_dc_precision;
3698         }
3699     }
3700     else if (s->h263_pred || s->h263_aic)
3701         s->mbintra_table[mb_xy]=1;
3702
3703     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3704         uint8_t *dest_y, *dest_cb, *dest_cr;
3705         int dct_linesize, dct_offset;
3706         op_pixels_func (*op_pix)[4];
3707         qpel_mc_func (*op_qpix)[16];
3708         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3709         const int uvlinesize= s->current_picture.linesize[1];
3710         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3711         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3712
3713         /* avoid copy if macroblock skipped in last frame too */
3714         /* skip only during decoding as we might trash the buffers during encoding a bit */
3715         if(!s->encoding){
3716             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3717             const int age= s->current_picture.age;
3718
3719             assert(age);
3720
3721             if (s->mb_skipped) {
3722                 s->mb_skipped= 0;
3723                 assert(s->pict_type!=I_TYPE);
3724
3725                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3726                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3727
3728                 /* if previous was skipped too, then nothing to do !  */
3729                 if (*mbskip_ptr >= age && s->current_picture.reference){
3730                     return;
3731                 }
3732             } else if(!s->current_picture.reference){
3733                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3734                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3735             } else{
3736                 *mbskip_ptr = 0; /* not skipped */
3737             }
3738         }
3739
3740         dct_linesize = linesize << s->interlaced_dct;
3741         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3742
3743         if(readable){
3744             dest_y=  s->dest[0];
3745             dest_cb= s->dest[1];
3746             dest_cr= s->dest[2];
3747         }else{
3748             dest_y = s->b_scratchpad;
3749             dest_cb= s->b_scratchpad+16*linesize;
3750             dest_cr= s->b_scratchpad+32*linesize;
3751         }
3752
3753         if (!s->mb_intra) {
3754             /* motion handling */
3755             /* decoding or more than one mb_type (MC was already done otherwise) */
3756             if(!s->encoding){
3757                 if(lowres_flag){
3758                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3759
3760                     if (s->mv_dir & MV_DIR_FORWARD) {
3761                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3762                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3763                     }
3764                     if (s->mv_dir & MV_DIR_BACKWARD) {
3765                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3766                     }
3767                 }else{
3768                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3769                         op_pix = s->dsp.put_pixels_tab;
3770                         op_qpix= s->dsp.put_qpel_pixels_tab;
3771                     }else{
3772                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3773                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3774                     }
3775                     if (s->mv_dir & MV_DIR_FORWARD) {
3776                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3777                         op_pix = s->dsp.avg_pixels_tab;
3778                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3779                     }
3780                     if (s->mv_dir & MV_DIR_BACKWARD) {
3781                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3782                     }
3783                 }
3784             }
3785
3786             /* skip dequant / idct if we are really late ;) */
3787             if(s->hurry_up>1) goto skip_idct;
3788             if(s->avctx->skip_idct){
3789                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3790                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3791                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3792                     goto skip_idct;
3793             }
3794
3795             /* add dct residue */
3796             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3797                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3798                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3799                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3800                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3801                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3802
3803                 if(!(s->flags&CODEC_FLAG_GRAY)){
3804                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3805                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3806                 }
3807             } else if(s->codec_id != CODEC_ID_WMV2){
3808                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3809                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3810                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3811                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3812
3813                 if(!(s->flags&CODEC_FLAG_GRAY)){
3814                     if(s->chroma_y_shift){//Chroma420
3815                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3816                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3817                     }else{
3818                         //chroma422
3819                         dct_linesize = uvlinesize << s->interlaced_dct;
3820                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3821
3822                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3823                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3824                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3825                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3826                         if(!s->chroma_x_shift){//Chroma444
3827                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3828                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3829                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3830                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3831                         }
3832                     }
3833                 }//fi gray
3834             }
3835             else{
3836                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3837             }
3838         } else {
3839             /* dct only in intra block */
3840             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3841                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3842                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3843                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3844                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3845
3846                 if(!(s->flags&CODEC_FLAG_GRAY)){
3847                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3848                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3849                 }
3850             }else{
3851                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3852                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3853                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3854                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3855
3856                 if(!(s->flags&CODEC_FLAG_GRAY)){
3857                     if(s->chroma_y_shift){
3858                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3859                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3860                     }else{
3861
3862                         dct_linesize = uvlinesize << s->interlaced_dct;
3863                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3864
3865                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3866                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3867                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3868                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3869                         if(!s->chroma_x_shift){//Chroma444
3870                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3871                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3872                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3873                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3874                         }
3875                     }
3876                 }//gray
3877             }
3878         }
3879 skip_idct:
3880         if(!readable){
3881             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3882             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3883             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3884         }
3885     }
3886 }
3887
3888 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3889     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3890     else                  MPV_decode_mb_internal(s, block, 0);
3891 }
3892
3893 #ifdef CONFIG_ENCODERS
3894
3895 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3896 {
3897     static const char tab[64]=
3898         {3,2,2,1,1,1,1,1,
3899          1,1,1,1,1,1,1,1,
3900          1,1,1,1,1,1,1,1,
3901          0,0,0,0,0,0,0,0,
3902          0,0,0,0,0,0,0,0,
3903          0,0,0,0,0,0,0,0,
3904          0,0,0,0,0,0,0,0,
3905          0,0,0,0,0,0,0,0};
3906     int score=0;
3907     int run=0;
3908     int i;
3909     DCTELEM *block= s->block[n];
3910     const int last_index= s->block_last_index[n];
3911     int skip_dc;
3912
3913     if(threshold<0){
3914         skip_dc=0;
3915         threshold= -threshold;
3916     }else
3917         skip_dc=1;
3918
3919     /* are all which we could set to zero are allready zero? */
3920     if(last_index<=skip_dc - 1) return;
3921
3922     for(i=0; i<=last_index; i++){
3923         const int j = s->intra_scantable.permutated[i];
3924         const int level = ABS(block[j]);
3925         if(level==1){
3926             if(skip_dc && i==0) continue;
3927             score+= tab[run];
3928             run=0;
3929         }else if(level>1){
3930             return;
3931         }else{
3932             run++;
3933         }
3934     }
3935     if(score >= threshold) return;
3936     for(i=skip_dc; i<=last_index; i++){
3937         const int j = s->intra_scantable.permutated[i];
3938         block[j]=0;
3939     }
3940     if(block[0]) s->block_last_index[n]= 0;
3941     else         s->block_last_index[n]= -1;
3942 }
3943
3944 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3945 {
3946     int i;
3947     const int maxlevel= s->max_qcoeff;
3948     const int minlevel= s->min_qcoeff;
3949     int overflow=0;
3950
3951     if(s->mb_intra){
3952         i=1; //skip clipping of intra dc
3953     }else
3954         i=0;
3955
3956     for(;i<=last_index; i++){
3957         const int j= s->intra_scantable.permutated[i];
3958         int level = block[j];
3959
3960         if     (level>maxlevel){
3961             level=maxlevel;
3962             overflow++;
3963         }else if(level<minlevel){
3964             level=minlevel;
3965             overflow++;
3966         }
3967
3968         block[j]= level;
3969     }
3970
3971     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3972         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
3973 }
3974
3975 #endif //CONFIG_ENCODERS
3976
3977 /**
3978  *
3979  * @param h is the normal height, this will be reduced automatically if needed for the last row
3980  */
3981 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3982     if (s->avctx->draw_horiz_band) {
3983         AVFrame *src;
3984         int offset[4];
3985
3986         if(s->picture_structure != PICT_FRAME){
3987             h <<= 1;
3988             y <<= 1;
3989             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3990         }
3991
3992         h= FFMIN(h, s->avctx->height - y);
3993
3994         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3995             src= (AVFrame*)s->current_picture_ptr;
3996         else if(s->last_picture_ptr)
3997             src= (AVFrame*)s->last_picture_ptr;
3998         else
3999             return;
4000
4001         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4002             offset[0]=
4003             offset[1]=
4004             offset[2]=
4005             offset[3]= 0;
4006         }else{
4007             offset[0]= y * s->linesize;;
4008             offset[1]=
4009             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4010             offset[3]= 0;
4011         }
4012
4013         emms_c();
4014
4015         s->avctx->draw_horiz_band(s->avctx, src, offset,
4016                                   y, s->picture_structure, h);
4017     }
4018 }
4019
4020 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4021     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4022     const int uvlinesize= s->current_picture.linesize[1];
4023     const int mb_size= 4 - s->avctx->lowres;
4024
4025     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4026     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4027     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4028     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4029     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4030     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4031     //block_index is not used by mpeg2, so it is not affected by chroma_format
4032
4033     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4034     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4035     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4036
4037     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4038     {
4039         s->dest[0] += s->mb_y *   linesize << mb_size;
4040         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4041         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4042     }
4043 }
4044
4045 #ifdef CONFIG_ENCODERS
4046
4047 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4048     int x, y;
4049 //FIXME optimize
4050     for(y=0; y<8; y++){
4051         for(x=0; x<8; x++){
4052             int x2, y2;
4053             int sum=0;
4054             int sqr=0;
4055             int count=0;
4056
4057             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4058                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4059                     int v= ptr[x2 + y2*stride];
4060                     sum += v;
4061                     sqr += v*v;
4062                     count++;
4063                 }
4064             }
4065             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4066         }
4067     }
4068 }
4069
4070 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4071 {
4072     int16_t weight[6][64];
4073     DCTELEM orig[6][64];
4074     const int mb_x= s->mb_x;
4075     const int mb_y= s->mb_y;
4076     int i;
4077     int skip_dct[6];
4078     int dct_offset   = s->linesize*8; //default for progressive frames
4079     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4080     int wrap_y, wrap_c;
4081
4082     for(i=0; i<6; i++) skip_dct[i]=0;
4083
4084     if(s->adaptive_quant){
4085         const int last_qp= s->qscale;
4086         const int mb_xy= mb_x + mb_y*s->mb_stride;
4087
4088         s->lambda= s->lambda_table[mb_xy];
4089         update_qscale(s);
4090
4091         if(!(s->flags&CODEC_FLAG_QP_RD)){
4092             s->dquant= s->qscale - last_qp;
4093
4094             if(s->out_format==FMT_H263){
4095                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4096
4097                 if(s->codec_id==CODEC_ID_MPEG4){
4098                     if(!s->mb_intra){
4099                         if(s->pict_type == B_TYPE){
4100                             if(s->dquant&1)
4101                                 s->dquant= (s->dquant/2)*2;
4102                             if(s->mv_dir&MV_DIRECT)
4103                                 s->dquant= 0;
4104                         }
4105                         if(s->mv_type==MV_TYPE_8X8)
4106                             s->dquant=0;
4107                     }
4108                 }
4109             }
4110         }
4111         ff_set_qscale(s, last_qp + s->dquant);
4112     }else if(s->flags&CODEC_FLAG_QP_RD)
4113         ff_set_qscale(s, s->qscale + s->dquant);
4114
4115     wrap_y = s->linesize;
4116     wrap_c = s->uvlinesize;
4117     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4118     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4119     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4120
4121     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4122         uint8_t *ebuf= s->edge_emu_buffer + 32;
4123         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4124         ptr_y= ebuf;
4125         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4126         ptr_cb= ebuf+18*wrap_y;
4127         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4128         ptr_cr= ebuf+18*wrap_y+8;
4129     }
4130
4131     if (s->mb_intra) {
4132         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4133             int progressive_score, interlaced_score;
4134
4135             s->interlaced_dct=0;
4136             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4137                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4138
4139             if(progressive_score > 0){
4140                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4141                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4142                 if(progressive_score > interlaced_score){
4143                     s->interlaced_dct=1;
4144
4145                     dct_offset= wrap_y;
4146                     wrap_y<<=1;
4147                 }
4148             }
4149         }
4150
4151         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4152         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4153         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4154         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4155
4156         if(s->flags&CODEC_FLAG_GRAY){
4157             skip_dct[4]= 1;
4158             skip_dct[5]= 1;
4159         }else{
4160             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4161             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4162         }
4163     }else{
4164         op_pixels_func (*op_pix)[4];
4165         qpel_mc_func (*op_qpix)[16];
4166         uint8_t *dest_y, *dest_cb, *dest_cr;
4167
4168         dest_y  = s->dest[0];
4169         dest_cb = s->dest[1];
4170         dest_cr = s->dest[2];
4171
4172         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4173             op_pix = s->dsp.put_pixels_tab;
4174             op_qpix= s->dsp.put_qpel_pixels_tab;
4175         }else{
4176             op_pix = s->dsp.put_no_rnd_pixels_tab;
4177             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4178         }
4179
4180         if (s->mv_dir & MV_DIR_FORWARD) {
4181             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4182             op_pix = s->dsp.avg_pixels_tab;
4183             op_qpix= s->dsp.avg_qpel_pixels_tab;
4184         }
4185         if (s->mv_dir & MV_DIR_BACKWARD) {
4186             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4187         }
4188
4189         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4190             int progressive_score, interlaced_score;
4191
4192             s->interlaced_dct=0;
4193             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4194                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4195
4196             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4197
4198             if(progressive_score>0){
4199                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4200                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4201
4202                 if(progressive_score > interlaced_score){
4203                     s->interlaced_dct=1;
4204
4205                     dct_offset= wrap_y;
4206                     wrap_y<<=1;
4207                 }
4208             }
4209         }
4210
4211         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4212         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4213         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4214         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4215
4216         if(s->flags&CODEC_FLAG_GRAY){
4217             skip_dct[4]= 1;
4218             skip_dct[5]= 1;
4219         }else{
4220             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4221             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4222         }
4223         /* pre quantization */
4224         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4225             //FIXME optimize
4226             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4227             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4228             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4229             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4230             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4231             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4232         }
4233     }
4234
4235     if(s->avctx->quantizer_noise_shaping){
4236         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4237         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4238         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4239         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4240         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4241         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4242         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4243     }
4244
4245     /* DCT & quantize */
4246     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4247     {
4248         for(i=0;i<6;i++) {
4249             if(!skip_dct[i]){
4250                 int overflow;
4251                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4252             // FIXME we could decide to change to quantizer instead of clipping
4253             // JS: I don't think that would be a good idea it could lower quality instead
4254             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4255                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4256             }else
4257                 s->block_last_index[i]= -1;
4258         }
4259         if(s->avctx->quantizer_noise_shaping){
4260             for(i=0;i<6;i++) {
4261                 if(!skip_dct[i]){
4262                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4263                 }
4264             }
4265         }
4266
4267         if(s->luma_elim_threshold && !s->mb_intra)
4268             for(i=0; i<4; i++)
4269                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4270         if(s->chroma_elim_threshold && !s->mb_intra)
4271             for(i=4; i<6; i++)
4272                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4273
4274         if(s->flags & CODEC_FLAG_CBP_RD){
4275             for(i=0;i<6;i++) {
4276                 if(s->block_last_index[i] == -1)
4277                     s->coded_score[i]= INT_MAX/256;
4278             }
4279         }
4280     }
4281
4282     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4283         s->block_last_index[4]=
4284         s->block_last_index[5]= 0;
4285         s->block[4][0]=
4286         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4287     }
4288
4289     //non c quantize code returns incorrect block_last_index FIXME
4290     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4291         for(i=0; i<6; i++){
4292             int j;
4293             if(s->block_last_index[i]>0){
4294                 for(j=63; j>0; j--){
4295                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4296                 }
4297                 s->block_last_index[i]= j;
4298             }
4299         }
4300     }
4301
4302     /* huffman encode */
4303     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4304     case CODEC_ID_MPEG1VIDEO:
4305     case CODEC_ID_MPEG2VIDEO:
4306         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4307     case CODEC_ID_MPEG4:
4308         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4309     case CODEC_ID_MSMPEG4V2:
4310     case CODEC_ID_MSMPEG4V3:
4311     case CODEC_ID_WMV1:
4312         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4313     case CODEC_ID_WMV2:
4314          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4315 #ifdef CONFIG_H261_ENCODER
4316     case CODEC_ID_H261:
4317         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4318 #endif
4319     case CODEC_ID_H263:
4320     case CODEC_ID_H263P:
4321     case CODEC_ID_FLV1:
4322     case CODEC_ID_RV10:
4323     case CODEC_ID_RV20:
4324         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4325     case CODEC_ID_MJPEG:
4326         mjpeg_encode_mb(s, s->block); break;
4327     default:
4328         assert(0);
4329     }
4330 }
4331
4332 #endif //CONFIG_ENCODERS
4333
4334 void ff_mpeg_flush(AVCodecContext *avctx){
4335     int i;
4336     MpegEncContext *s = avctx->priv_data;
4337
4338     if(s==NULL || s->picture==NULL)
4339         return;
4340
4341     for(i=0; i<MAX_PICTURE_COUNT; i++){
4342        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4343                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4344         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4345     }
4346     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4347
4348     s->mb_x= s->mb_y= 0;
4349
4350     s->parse_context.state= -1;
4351     s->parse_context.frame_start_found= 0;
4352     s->parse_context.overread= 0;
4353     s->parse_context.overread_index= 0;
4354     s->parse_context.index= 0;
4355     s->parse_context.last_index= 0;
4356     s->bitstream_buffer_size=0;
4357 }
4358
4359 #ifdef CONFIG_ENCODERS
4360 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4361 {
4362     const uint16_t *srcw= (uint16_t*)src;
4363     int words= length>>4;
4364     int bits= length&15;
4365     int i;
4366
4367     if(length==0) return;
4368
4369     if(words < 16){
4370         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4371     }else if(put_bits_count(pb)&7){
4372         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4373     }else{
4374         for(i=0; put_bits_count(pb)&31; i++)
4375             put_bits(pb, 8, src[i]);
4376         flush_put_bits(pb);
4377         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4378         skip_put_bytes(pb, 2*words-i);
4379     }
4380
4381     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4382 }
4383
4384 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4385     int i;
4386
4387     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4388
4389     /* mpeg1 */
4390     d->mb_skip_run= s->mb_skip_run;
4391     for(i=0; i<3; i++)
4392         d->last_dc[i]= s->last_dc[i];
4393
4394     /* statistics */
4395     d->mv_bits= s->mv_bits;
4396     d->i_tex_bits= s->i_tex_bits;
4397     d->p_tex_bits= s->p_tex_bits;
4398     d->i_count= s->i_count;
4399     d->f_count= s->f_count;
4400     d->b_count= s->b_count;
4401     d->skip_count= s->skip_count;
4402     d->misc_bits= s->misc_bits;
4403     d->last_bits= 0;
4404
4405     d->mb_skipped= 0;
4406     d->qscale= s->qscale;
4407     d->dquant= s->dquant;
4408 }
4409
4410 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4411     int i;
4412
4413     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4414     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4415
4416     /* mpeg1 */
4417     d->mb_skip_run= s->mb_skip_run;
4418     for(i=0; i<3; i++)
4419         d->last_dc[i]= s->last_dc[i];
4420
4421     /* statistics */
4422     d->mv_bits= s->mv_bits;
4423     d->i_tex_bits= s->i_tex_bits;
4424     d->p_tex_bits= s->p_tex_bits;
4425     d->i_count= s->i_count;
4426     d->f_count= s->f_count;
4427     d->b_count= s->b_count;
4428     d->skip_count= s->skip_count;
4429     d->misc_bits= s->misc_bits;
4430
4431     d->mb_intra= s->mb_intra;
4432     d->mb_skipped= s->mb_skipped;
4433     d->mv_type= s->mv_type;
4434     d->mv_dir= s->mv_dir;
4435     d->pb= s->pb;
4436     if(s->data_partitioning){
4437         d->pb2= s->pb2;
4438         d->tex_pb= s->tex_pb;
4439     }
4440     d->block= s->block;
4441     for(i=0; i<6; i++)
4442         d->block_last_index[i]= s->block_last_index[i];
4443     d->interlaced_dct= s->interlaced_dct;
4444     d->qscale= s->qscale;
4445 }
4446
4447 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4448                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4449                            int *dmin, int *next_block, int motion_x, int motion_y)
4450 {
4451     int score;
4452     uint8_t *dest_backup[3];
4453
4454     copy_context_before_encode(s, backup, type);
4455
4456     s->block= s->blocks[*next_block];
4457     s->pb= pb[*next_block];
4458     if(s->data_partitioning){
4459         s->pb2   = pb2   [*next_block];
4460         s->tex_pb= tex_pb[*next_block];
4461     }
4462
4463     if(*next_block){
4464         memcpy(dest_backup, s->dest, sizeof(s->dest));
4465         s->dest[0] = s->rd_scratchpad;
4466         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4467         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4468         assert(s->linesize >= 32); //FIXME
4469     }
4470
4471     encode_mb(s, motion_x, motion_y);
4472
4473     score= put_bits_count(&s->pb);
4474     if(s->data_partitioning){
4475         score+= put_bits_count(&s->pb2);
4476         score+= put_bits_count(&s->tex_pb);
4477     }
4478
4479     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4480         MPV_decode_mb(s, s->block);
4481
4482         score *= s->lambda2;
4483         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4484     }
4485
4486     if(*next_block){
4487         memcpy(s->dest, dest_backup, sizeof(s->dest));
4488     }
4489
4490     if(score<*dmin){
4491         *dmin= score;
4492         *next_block^=1;
4493
4494         copy_context_after_encode(best, s, type);
4495     }
4496 }
4497
4498 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4499     uint32_t *sq = squareTbl + 256;
4500     int acc=0;
4501     int x,y;
4502
4503     if(w==16 && h==16)
4504         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4505     else if(w==8 && h==8)
4506         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4507
4508     for(y=0; y<h; y++){
4509         for(x=0; x<w; x++){
4510             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4511         }
4512     }
4513
4514     assert(acc>=0);
4515
4516     return acc;
4517 }
4518
4519 static int sse_mb(MpegEncContext *s){
4520     int w= 16;
4521     int h= 16;
4522
4523     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4524     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4525
4526     if(w==16 && h==16)
4527       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4528         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4529                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4530                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4531       }else{
4532         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4533                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4534                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4535       }
4536     else
4537         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4538                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4539                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4540 }
4541
4542 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4543     MpegEncContext *s= arg;
4544
4545
4546     s->me.pre_pass=1;
4547     s->me.dia_size= s->avctx->pre_dia_size;
4548     s->first_slice_line=1;
4549     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4550         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4551             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4552         }
4553         s->first_slice_line=0;
4554     }
4555
4556     s->me.pre_pass=0;
4557
4558     return 0;
4559 }
4560
4561 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4562     MpegEncContext *s= arg;
4563
4564     s->me.dia_size= s->avctx->dia_size;
4565     s->first_slice_line=1;
4566     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4567         s->mb_x=0; //for block init below
4568         ff_init_block_index(s);
4569         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4570             s->block_index[0]+=2;
4571             s->block_index[1]+=2;
4572             s->block_index[2]+=2;
4573             s->block_index[3]+=2;
4574
4575             /* compute motion vector & mb_type and store in context */
4576             if(s->pict_type==B_TYPE)
4577                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4578             else
4579                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4580         }
4581         s->first_slice_line=0;
4582     }
4583     return 0;
4584 }
4585
4586 static int mb_var_thread(AVCodecContext *c, void *arg){
4587     MpegEncContext *s= arg;
4588     int mb_x, mb_y;
4589
4590     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4591         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4592             int xx = mb_x * 16;
4593             int yy = mb_y * 16;
4594             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4595             int varc;
4596             int sum = s->dsp.pix_sum(pix, s->linesize);
4597
4598             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4599
4600             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4601             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4602             s->me.mb_var_sum_temp    += varc;
4603         }
4604     }
4605     return 0;
4606 }
4607
4608 static void write_slice_end(MpegEncContext *s){
4609     if(s->codec_id==CODEC_ID_MPEG4){
4610         if(s->partitioned_frame){
4611             ff_mpeg4_merge_partitions(s);
4612         }
4613
4614         ff_mpeg4_stuffing(&s->pb);
4615     }else if(s->out_format == FMT_MJPEG){
4616         ff_mjpeg_stuffing(&s->pb);
4617     }
4618
4619     align_put_bits(&s->pb);
4620     flush_put_bits(&s->pb);
4621
4622     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4623         s->misc_bits+= get_bits_diff(s);
4624 }
4625
4626 static int encode_thread(AVCodecContext *c, void *arg){
4627     MpegEncContext *s= arg;
4628     int mb_x, mb_y, pdif = 0;
4629     int i, j;
4630     MpegEncContext best_s, backup_s;
4631     uint8_t bit_buf[2][MAX_MB_BYTES];
4632     uint8_t bit_buf2[2][MAX_MB_BYTES];
4633     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4634     PutBitContext pb[2], pb2[2], tex_pb[2];
4635 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4636
4637     for(i=0; i<2; i++){
4638         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4639         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4640         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4641     }
4642
4643     s->last_bits= put_bits_count(&s->pb);
4644     s->mv_bits=0;
4645     s->misc_bits=0;
4646     s->i_tex_bits=0;
4647     s->p_tex_bits=0;
4648     s->i_count=0;
4649     s->f_count=0;
4650     s->b_count=0;
4651     s->skip_count=0;
4652
4653     for(i=0; i<3; i++){
4654         /* init last dc values */
4655         /* note: quant matrix value (8) is implied here */
4656         s->last_dc[i] = 128 << s->intra_dc_precision;
4657
4658         s->current_picture.error[i] = 0;
4659     }
4660     s->mb_skip_run = 0;
4661     memset(s->last_mv, 0, sizeof(s->last_mv));
4662
4663     s->last_mv_dir = 0;
4664
4665     switch(s->codec_id){
4666     case CODEC_ID_H263:
4667     case CODEC_ID_H263P:
4668     case CODEC_ID_FLV1:
4669         s->gob_index = ff_h263_get_gob_height(s);
4670         break;
4671     case CODEC_ID_MPEG4:
4672         if(s->partitioned_frame)
4673             ff_mpeg4_init_partitions(s);
4674         break;
4675     }
4676
4677     s->resync_mb_x=0;
4678     s->resync_mb_y=0;
4679     s->first_slice_line = 1;
4680     s->ptr_lastgob = s->pb.buf;
4681     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4682 //    printf("row %d at %X\n", s->mb_y, (int)s);
4683         s->mb_x=0;
4684         s->mb_y= mb_y;
4685
4686         ff_set_qscale(s, s->qscale);
4687         ff_init_block_index(s);
4688
4689         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4690             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4691             int mb_type= s->mb_type[xy];
4692 //            int d;
4693             int dmin= INT_MAX;
4694             int dir;
4695
4696             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4697                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4698                 return -1;
4699             }
4700             if(s->data_partitioning){
4701                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4702                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4703                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4704                     return -1;
4705                 }
4706             }
4707
4708             s->mb_x = mb_x;
4709             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4710             ff_update_block_index(s);
4711
4712 #ifdef CONFIG_H261_ENCODER
4713             if(s->codec_id == CODEC_ID_H261){
4714                 ff_h261_reorder_mb_index(s);
4715                 xy= s->mb_y*s->mb_stride + s->mb_x;
4716                 mb_type= s->mb_type[xy];
4717             }
4718 #endif
4719
4720             /* write gob / video packet header  */
4721             if(s->rtp_mode){
4722                 int current_packet_size, is_gob_start;
4723
4724                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4725
4726                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4727
4728                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4729
4730                 switch(s->codec_id){
4731                 case CODEC_ID_H263:
4732                 case CODEC_ID_H263P:
4733                     if(!s->h263_slice_structured)
4734                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4735                     break;
4736                 case CODEC_ID_MPEG2VIDEO:
4737                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4738                 case CODEC_ID_MPEG1VIDEO:
4739                     if(s->mb_skip_run) is_gob_start=0;
4740                     break;
4741                 }
4742
4743                 if(is_gob_start){
4744                     if(s->start_mb_y != mb_y || mb_x!=0){
4745                         write_slice_end(s);
4746
4747                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4748                             ff_mpeg4_init_partitions(s);
4749                         }
4750                     }
4751
4752                     assert((put_bits_count(&s->pb)&7) == 0);
4753                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4754
4755                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4756                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4757                         int d= 100 / s->avctx->error_rate;
4758                         if(r % d == 0){
4759                             current_packet_size=0;
4760 #ifndef ALT_BITSTREAM_WRITER
4761                             s->pb.buf_ptr= s->ptr_lastgob;
4762 #endif
4763                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4764                         }
4765                     }
4766
4767                     if (s->avctx->rtp_callback){
4768                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4769                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4770                     }
4771
4772                     switch(s->codec_id){
4773                     case CODEC_ID_MPEG4:
4774                         ff_mpeg4_encode_video_packet_header(s);
4775                         ff_mpeg4_clean_buffers(s);
4776                     break;
4777                     case CODEC_ID_MPEG1VIDEO:
4778                     case CODEC_ID_MPEG2VIDEO:
4779                         ff_mpeg1_encode_slice_header(s);
4780                         ff_mpeg1_clean_buffers(s);
4781                     break;
4782                     case CODEC_ID_H263:
4783                     case CODEC_ID_H263P:
4784                         h263_encode_gob_header(s, mb_y);
4785                     break;
4786                     }
4787
4788                     if(s->flags&CODEC_FLAG_PASS1){
4789                         int bits= put_bits_count(&s->pb);
4790                         s->misc_bits+= bits - s->last_bits;
4791                         s->last_bits= bits;
4792                     }
4793
4794                     s->ptr_lastgob += current_packet_size;
4795                     s->first_slice_line=1;
4796                     s->resync_mb_x=mb_x;
4797                     s->resync_mb_y=mb_y;
4798                 }
4799             }
4800
4801             if(  (s->resync_mb_x   == s->mb_x)
4802                && s->resync_mb_y+1 == s->mb_y){
4803                 s->first_slice_line=0;
4804             }
4805
4806             s->mb_skipped=0;
4807             s->dquant=0; //only for QP_RD
4808
4809             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4810                 int next_block=0;
4811                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4812
4813                 copy_context_before_encode(&backup_s, s, -1);
4814                 backup_s.pb= s->pb;
4815                 best_s.data_partitioning= s->data_partitioning;
4816                 best_s.partitioned_frame= s->partitioned_frame;
4817                 if(s->data_partitioning){
4818                     backup_s.pb2= s->pb2;
4819                     backup_s.tex_pb= s->tex_pb;
4820                 }
4821
4822                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4823                     s->mv_dir = MV_DIR_FORWARD;
4824                     s->mv_type = MV_TYPE_16X16;
4825                     s->mb_intra= 0;
4826                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4827                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4828                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4829                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4830                 }
4831                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4832                     s->mv_dir = MV_DIR_FORWARD;
4833                     s->mv_type = MV_TYPE_FIELD;
4834                     s->mb_intra= 0;
4835                     for(i=0; i<2; i++){
4836                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4837                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4838                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4839                     }
4840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4841                                  &dmin, &next_block, 0, 0);
4842                 }
4843                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4844                     s->mv_dir = MV_DIR_FORWARD;
4845                     s->mv_type = MV_TYPE_16X16;
4846                     s->mb_intra= 0;
4847                     s->mv[0][0][0] = 0;
4848                     s->mv[0][0][1] = 0;
4849                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4850                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4851                 }
4852                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4853                     s->mv_dir = MV_DIR_FORWARD;
4854                     s->mv_type = MV_TYPE_8X8;
4855                     s->mb_intra= 0;
4856                     for(i=0; i<4; i++){
4857                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4858                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4859                     }
4860                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4861                                  &dmin, &next_block, 0, 0);
4862                 }
4863                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4864                     s->mv_dir = MV_DIR_FORWARD;
4865                     s->mv_type = MV_TYPE_16X16;
4866                     s->mb_intra= 0;
4867                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4868                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4869                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4870                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4871                 }
4872                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4873                     s->mv_dir = MV_DIR_BACKWARD;
4874                     s->mv_type = MV_TYPE_16X16;
4875                     s->mb_intra= 0;
4876                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4877                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4878                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4879                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4880                 }
4881                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4882                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4883                     s->mv_type = MV_TYPE_16X16;
4884                     s->mb_intra= 0;
4885                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4886                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4887                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4888                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4889                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4890                                  &dmin, &next_block, 0, 0);
4891                 }
4892                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4893                     int mx= s->b_direct_mv_table[xy][0];
4894                     int my= s->b_direct_mv_table[xy][1];
4895
4896                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4897                     s->mb_intra= 0;
4898                     ff_mpeg4_set_direct_mv(s, mx, my);
4899                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4900                                  &dmin, &next_block, mx, my);
4901                 }
4902                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4903                     s->mv_dir = MV_DIR_FORWARD;
4904                     s->mv_type = MV_TYPE_FIELD;
4905                     s->mb_intra= 0;
4906                     for(i=0; i<2; i++){
4907                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4908                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4909                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4910                     }
4911                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4912                                  &dmin, &next_block, 0, 0);
4913                 }
4914                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4915                     s->mv_dir = MV_DIR_BACKWARD;
4916                     s->mv_type = MV_TYPE_FIELD;
4917                     s->mb_intra= 0;
4918                     for(i=0; i<2; i++){
4919                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4920                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4921                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4922                     }
4923                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4924                                  &dmin, &next_block, 0, 0);
4925                 }
4926                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4927                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4928                     s->mv_type = MV_TYPE_FIELD;
4929                     s->mb_intra= 0;
4930                     for(dir=0; dir<2; dir++){
4931                         for(i=0; i<2; i++){
4932                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4933                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4934                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4935                         }
4936                     }
4937                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4938                                  &dmin, &next_block, 0, 0);
4939                 }
4940                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4941                     s->mv_dir = 0;
4942                     s->mv_type = MV_TYPE_16X16;
4943                     s->mb_intra= 1;
4944                     s->mv[0][0][0] = 0;
4945                     s->mv[0][0][1] = 0;
4946                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4947                                  &dmin, &next_block, 0, 0);
4948                     if(s->h263_pred || s->h263_aic){
4949                         if(best_s.mb_intra)
4950                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4951                         else
4952                             ff_clean_intra_table_entries(s); //old mode?
4953                     }
4954                 }
4955
4956                 if(s->flags & CODEC_FLAG_QP_RD){
4957                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4958                         const int last_qp= backup_s.qscale;
4959                         int dquant, dir, qp, dc[6];
4960                         DCTELEM ac[6][16];
4961                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4962
4963                         assert(backup_s.dquant == 0);
4964
4965                         //FIXME intra
4966                         s->mv_dir= best_s.mv_dir;
4967                         s->mv_type = MV_TYPE_16X16;
4968                         s->mb_intra= best_s.mb_intra;
4969                         s->mv[0][0][0] = best_s.mv[0][0][0];
4970                         s->mv[0][0][1] = best_s.mv[0][0][1];
4971                         s->mv[1][0][0] = best_s.mv[1][0][0];
4972                         s->mv[1][0][1] = best_s.mv[1][0][1];
4973
4974                         dir= s->pict_type == B_TYPE ? 2 : 1;
4975                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4976                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4977                             qp= last_qp + dquant;
4978                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4979                                 break;
4980                             backup_s.dquant= dquant;
4981                             if(s->mb_intra && s->dc_val[0]){
4982                                 for(i=0; i<6; i++){
4983                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4984                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4985                                 }
4986                             }
4987
4988                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4989                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4990                             if(best_s.qscale != qp){
4991                                 if(s->mb_intra && s->dc_val[0]){
4992                                     for(i=0; i<6; i++){
4993                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4994                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4995                                     }
4996                                 }
4997                                 if(dir > 0 && dquant==dir){
4998                                     dquant= 0;
4999                                     dir= -dir;
5000                                 }else
5001                                     break;
5002                             }
5003                         }
5004                         qp= best_s.qscale;
5005                         s->current_picture.qscale_table[xy]= qp;
5006                     }
5007                 }
5008
5009                 copy_context_after_encode(s, &best_s, -1);
5010
5011                 pb_bits_count= put_bits_count(&s->pb);
5012                 flush_put_bits(&s->pb);
5013                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5014                 s->pb= backup_s.pb;
5015
5016                 if(s->data_partitioning){
5017                     pb2_bits_count= put_bits_count(&s->pb2);
5018                     flush_put_bits(&s->pb2);
5019                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5020                     s->pb2= backup_s.pb2;
5021
5022                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5023                     flush_put_bits(&s->tex_pb);
5024                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5025                     s->tex_pb= backup_s.tex_pb;
5026                 }
5027                 s->last_bits= put_bits_count(&s->pb);
5028
5029                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5030                     ff_h263_update_motion_val(s);
5031
5032                 if(next_block==0){ //FIXME 16 vs linesize16
5033                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5034                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5035                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5036                 }
5037
5038                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5039                     MPV_decode_mb(s, s->block);
5040             } else {
5041                 int motion_x, motion_y;
5042                 s->mv_type=MV_TYPE_16X16;
5043                 // only one MB-Type possible
5044
5045                 switch(mb_type){
5046                 case CANDIDATE_MB_TYPE_INTRA:
5047                     s->mv_dir = 0;
5048                     s->mb_intra= 1;
5049                     motion_x= s->mv[0][0][0] = 0;
5050                     motion_y= s->mv[0][0][1] = 0;
5051                     break;
5052                 case CANDIDATE_MB_TYPE_INTER:
5053                     s->mv_dir = MV_DIR_FORWARD;
5054                     s->mb_intra= 0;
5055                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5056                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5057                     break;
5058                 case CANDIDATE_MB_TYPE_INTER_I:
5059                     s->mv_dir = MV_DIR_FORWARD;
5060                     s->mv_type = MV_TYPE_FIELD;
5061                     s->mb_intra= 0;
5062                     for(i=0; i<2; i++){
5063                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5064                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5065                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5066                     }
5067                     motion_x = motion_y = 0;
5068                     break;
5069                 case CANDIDATE_MB_TYPE_INTER4V:
5070                     s->mv_dir = MV_DIR_FORWARD;
5071                     s->mv_type = MV_TYPE_8X8;
5072                     s->mb_intra= 0;
5073                     for(i=0; i<4; i++){
5074                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5075                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5076                     }
5077                     motion_x= motion_y= 0;
5078                     break;
5079                 case CANDIDATE_MB_TYPE_DIRECT:
5080                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5081                     s->mb_intra= 0;
5082                     motion_x=s->b_direct_mv_table[xy][0];
5083                     motion_y=s->b_direct_mv_table[xy][1];
5084                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5085                     break;
5086                 case CANDIDATE_MB_TYPE_BIDIR:
5087                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5088                     s->mb_intra= 0;
5089                     motion_x=0;
5090                     motion_y=0;
5091                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5092                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5093                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5094                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5095                     break;
5096                 case CANDIDATE_MB_TYPE_BACKWARD:
5097                     s->mv_dir = MV_DIR_BACKWARD;
5098                     s->mb_intra= 0;
5099                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5100                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5101                     break;
5102                 case CANDIDATE_MB_TYPE_FORWARD:
5103                     s->mv_dir = MV_DIR_FORWARD;
5104                     s->mb_intra= 0;
5105                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5106                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5107 //                    printf(" %d %d ", motion_x, motion_y);
5108                     break;
5109                 case CANDIDATE_MB_TYPE_FORWARD_I:
5110                     s->mv_dir = MV_DIR_FORWARD;
5111                     s->mv_type = MV_TYPE_FIELD;
5112                     s->mb_intra= 0;
5113                     for(i=0; i<2; i++){
5114                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5115                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5116                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5117                     }
5118                     motion_x=motion_y=0;
5119                     break;
5120                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5121                     s->mv_dir = MV_DIR_BACKWARD;
5122                     s->mv_type = MV_TYPE_FIELD;
5123                     s->mb_intra= 0;
5124                     for(i=0; i<2; i++){
5125                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5126                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5127                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5128                     }
5129                     motion_x=motion_y=0;
5130                     break;
5131                 case CANDIDATE_MB_TYPE_BIDIR_I:
5132                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5133                     s->mv_type = MV_TYPE_FIELD;
5134                     s->mb_intra= 0;
5135                     for(dir=0; dir<2; dir++){
5136                         for(i=0; i<2; i++){
5137                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5138                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5139                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5140                         }
5141                     }
5142                     motion_x=motion_y=0;
5143                     break;
5144                 default:
5145                     motion_x=motion_y=0; //gcc warning fix
5146                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5147                 }
5148
5149                 encode_mb(s, motion_x, motion_y);
5150
5151                 // RAL: Update last macroblock type
5152                 s->last_mv_dir = s->mv_dir;
5153
5154                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5155                     ff_h263_update_motion_val(s);
5156
5157                 MPV_decode_mb(s, s->block);
5158             }
5159
5160             /* clean the MV table in IPS frames for direct mode in B frames */
5161             if(s->mb_intra /* && I,P,S_TYPE */){
5162                 s->p_mv_table[xy][0]=0;
5163                 s->p_mv_table[xy][1]=0;
5164             }
5165
5166             if(s->flags&CODEC_FLAG_PSNR){
5167                 int w= 16;
5168                 int h= 16;
5169
5170                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5171                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5172
5173                 s->current_picture.error[0] += sse(
5174                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5175                     s->dest[0], w, h, s->linesize);
5176                 s->current_picture.error[1] += sse(
5177                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5178                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5179                 s->current_picture.error[2] += sse(
5180                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5181                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5182             }
5183             if(s->loop_filter){
5184                 if(s->out_format == FMT_H263)
5185                     ff_h263_loop_filter(s);
5186             }
5187 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5188         }
5189     }
5190
5191     //not beautiful here but we must write it before flushing so it has to be here
5192     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5193         msmpeg4_encode_ext_header(s);
5194
5195     write_slice_end(s);
5196
5197     /* Send the last GOB if RTP */
5198     if (s->avctx->rtp_callback) {
5199         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5200         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5201         /* Call the RTP callback to send the last GOB */
5202         emms_c();
5203         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5204     }
5205
5206     return 0;
5207 }
5208
5209 #define MERGE(field) dst->field += src->field; src->field=0
5210 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5211     MERGE(me.scene_change_score);
5212     MERGE(me.mc_mb_var_sum_temp);
5213     MERGE(me.mb_var_sum_temp);
5214 }
5215
5216 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5217     int i;
5218
5219     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5220     MERGE(dct_count[1]);
5221     MERGE(mv_bits);
5222     MERGE(i_tex_bits);
5223     MERGE(p_tex_bits);
5224     MERGE(i_count);
5225     MERGE(f_count);
5226     MERGE(b_count);
5227     MERGE(skip_count);
5228     MERGE(misc_bits);
5229     MERGE(error_count);
5230     MERGE(padding_bug_score);
5231     MERGE(current_picture.error[0]);
5232     MERGE(current_picture.error[1]);
5233     MERGE(current_picture.error[2]);
5234
5235     if(dst->avctx->noise_reduction){
5236         for(i=0; i<64; i++){
5237             MERGE(dct_error_sum[0][i]);
5238             MERGE(dct_error_sum[1][i]);
5239         }
5240     }
5241
5242     assert(put_bits_count(&src->pb) % 8 ==0);
5243     assert(put_bits_count(&dst->pb) % 8 ==0);
5244     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5245     flush_put_bits(&dst->pb);
5246 }
5247
5248 static void estimate_qp(MpegEncContext *s, int dry_run){
5249     if (!s->fixed_qscale)
5250         s->current_picture_ptr->quality=
5251         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5252
5253     if(s->adaptive_quant){
5254         switch(s->codec_id){
5255         case CODEC_ID_MPEG4:
5256             ff_clean_mpeg4_qscales(s);
5257             break;
5258         case CODEC_ID_H263:
5259         case CODEC_ID_H263P:
5260         case CODEC_ID_FLV1:
5261             ff_clean_h263_qscales(s);
5262             break;
5263         }
5264
5265         s->lambda= s->lambda_table[0];
5266         //FIXME broken
5267     }else
5268         s->lambda= s->current_picture.quality;
5269 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5270     update_qscale(s);
5271 }
5272
5273 static void encode_picture(MpegEncContext *s, int picture_number)
5274 {
5275     int i;
5276     int bits;
5277
5278     s->picture_number = picture_number;
5279
5280     /* Reset the average MB variance */
5281     s->me.mb_var_sum_temp    =
5282     s->me.mc_mb_var_sum_temp = 0;
5283
5284     /* we need to initialize some time vars before we can encode b-frames */
5285     // RAL: Condition added for MPEG1VIDEO
5286     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5287         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5288
5289     s->me.scene_change_score=0;
5290
5291 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5292
5293     if(s->pict_type==I_TYPE){
5294         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5295         else                        s->no_rounding=0;
5296     }else if(s->pict_type!=B_TYPE){
5297         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5298             s->no_rounding ^= 1;
5299     }
5300
5301     if(s->flags & CODEC_FLAG_PASS2)
5302         estimate_qp(s, 1);
5303
5304
5305     s->mb_intra=0; //for the rate distortion & bit compare functions
5306     for(i=1; i<s->avctx->thread_count; i++){
5307         ff_update_duplicate_context(s->thread_context[i], s);
5308     }
5309
5310     ff_init_me(s);
5311
5312     /* Estimate motion for every MB */
5313     if(s->pict_type != I_TYPE){
5314         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5315         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5316         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5317             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5318                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5319             }
5320         }
5321
5322         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5323     }else /* if(s->pict_type == I_TYPE) */{
5324         /* I-Frame */
5325         for(i=0; i<s->mb_stride*s->mb_height; i++)
5326             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5327
5328         if(!s->fixed_qscale){
5329             /* finding spatial complexity for I-frame rate control */
5330             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5331         }
5332     }
5333     for(i=1; i<s->avctx->thread_count; i++){
5334         merge_context_after_me(s, s->thread_context[i]);
5335     }
5336     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5337     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5338     emms_c();
5339
5340     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5341         s->pict_type= I_TYPE;
5342         for(i=0; i<s->mb_stride*s->mb_height; i++)
5343             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5344 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5345     }
5346
5347     if(!s->umvplus){
5348         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5349             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5350
5351             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5352                 int a,b;
5353                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5354                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5355                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5356             }
5357
5358             ff_fix_long_p_mvs(s);
5359             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5360             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5361                 int j;
5362                 for(i=0; i<2; i++){
5363                     for(j=0; j<2; j++)
5364                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5365                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5366                 }
5367             }
5368         }
5369
5370         if(s->pict_type==B_TYPE){
5371             int a, b;
5372
5373             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5374             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5375             s->f_code = FFMAX(a, b);
5376
5377             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5378             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5379             s->b_code = FFMAX(a, b);
5380
5381             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5382             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5383             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5384             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5385             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5386                 int dir, j;
5387                 for(dir=0; dir<2; dir++){
5388                     for(i=0; i<2; i++){
5389                         for(j=0; j<2; j++){
5390                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5391                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5392                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5393                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5394                         }
5395                     }
5396                 }
5397             }
5398         }
5399     }
5400
5401     estimate_qp(s, 0);
5402
5403     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5404         s->qscale= 3; //reduce clipping problems
5405
5406     if (s->out_format == FMT_MJPEG) {
5407         /* for mjpeg, we do include qscale in the matrix */
5408         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5409         for(i=1;i<64;i++){
5410             int j= s->dsp.idct_permutation[i];
5411
5412             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5413         }
5414         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5415                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5416         s->qscale= 8;
5417     }
5418
5419     //FIXME var duplication
5420     s->current_picture_ptr->key_frame=
5421     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5422     s->current_picture_ptr->pict_type=
5423     s->current_picture.pict_type= s->pict_type;
5424
5425     if(s->current_picture.key_frame)
5426         s->picture_in_gop_number=0;
5427
5428     s->last_bits= put_bits_count(&s->pb);
5429     switch(s->out_format) {
5430     case FMT_MJPEG:
5431         mjpeg_picture_header(s);
5432         break;
5433 #ifdef CONFIG_H261_ENCODER
5434     case FMT_H261:
5435         ff_h261_encode_picture_header(s, picture_number);
5436         break;
5437 #endif
5438     case FMT_H263:
5439         if (s->codec_id == CODEC_ID_WMV2)
5440             ff_wmv2_encode_picture_header(s, picture_number);
5441         else if (s->h263_msmpeg4)
5442             msmpeg4_encode_picture_header(s, picture_number);
5443         else if (s->h263_pred)
5444             mpeg4_encode_picture_header(s, picture_number);
5445 #ifdef CONFIG_RV10_ENCODER
5446         else if (s->codec_id == CODEC_ID_RV10)
5447             rv10_encode_picture_header(s, picture_number);
5448 #endif
5449 #ifdef CONFIG_RV20_ENCODER
5450         else if (s->codec_id == CODEC_ID_RV20)
5451             rv20_encode_picture_header(s, picture_number);
5452 #endif
5453         else if (s->codec_id == CODEC_ID_FLV1)
5454             ff_flv_encode_picture_header(s, picture_number);
5455         else
5456             h263_encode_picture_header(s, picture_number);
5457         break;
5458     case FMT_MPEG1:
5459         mpeg1_encode_picture_header(s, picture_number);
5460         break;
5461     case FMT_H264:
5462         break;
5463     default:
5464         assert(0);
5465     }
5466     bits= put_bits_count(&s->pb);
5467     s->header_bits= bits - s->last_bits;
5468
5469     for(i=1; i<s->avctx->thread_count; i++){
5470         update_duplicate_context_after_me(s->thread_context[i], s);
5471     }
5472     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5473     for(i=1; i<s->avctx->thread_count; i++){
5474         merge_context_after_encode(s, s->thread_context[i]);
5475     }
5476     emms_c();
5477 }
5478
5479 #endif //CONFIG_ENCODERS
5480
5481 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5482     const int intra= s->mb_intra;
5483     int i;
5484
5485     s->dct_count[intra]++;
5486
5487     for(i=0; i<64; i++){
5488         int level= block[i];
5489
5490         if(level){
5491             if(level>0){
5492                 s->dct_error_sum[intra][i] += level;
5493                 level -= s->dct_offset[intra][i];
5494                 if(level<0) level=0;
5495             }else{
5496                 s->dct_error_sum[intra][i] -= level;
5497                 level += s->dct_offset[intra][i];
5498                 if(level>0) level=0;
5499             }
5500             block[i]= level;
5501         }
5502     }
5503 }
5504
5505 #ifdef CONFIG_ENCODERS
5506
5507 static int dct_quantize_trellis_c(MpegEncContext *s,
5508                         DCTELEM *block, int n,
5509                         int qscale, int *overflow){
5510     const int *qmat;
5511     const uint8_t *scantable= s->intra_scantable.scantable;
5512     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5513     int max=0;
5514     unsigned int threshold1, threshold2;
5515     int bias=0;
5516     int run_tab[65];
5517     int level_tab[65];
5518     int score_tab[65];
5519     int survivor[65];
5520     int survivor_count;
5521     int last_run=0;
5522     int last_level=0;
5523     int last_score= 0;
5524     int last_i;
5525     int coeff[2][64];
5526     int coeff_count[64];
5527     int qmul, qadd, start_i, last_non_zero, i, dc;
5528     const int esc_length= s->ac_esc_length;
5529     uint8_t * length;
5530     uint8_t * last_length;
5531     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5532
5533     s->dsp.fdct (block);
5534
5535     if(s->dct_error_sum)
5536         s->denoise_dct(s, block);
5537     qmul= qscale*16;
5538     qadd= ((qscale-1)|1)*8;
5539
5540     if (s->mb_intra) {
5541         int q;
5542         if (!s->h263_aic) {
5543             if (n < 4)
5544                 q = s->y_dc_scale;
5545             else
5546                 q = s->c_dc_scale;
5547             q = q << 3;
5548         } else{
5549             /* For AIC we skip quant/dequant of INTRADC */
5550             q = 1 << 3;
5551             qadd=0;
5552         }
5553
5554         /* note: block[0] is assumed to be positive */
5555         block[0] = (block[0] + (q >> 1)) / q;
5556         start_i = 1;
5557         last_non_zero = 0;
5558         qmat = s->q_intra_matrix[qscale];
5559         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5560             bias= 1<<(QMAT_SHIFT-1);
5561         length     = s->intra_ac_vlc_length;
5562         last_length= s->intra_ac_vlc_last_length;
5563     } else {
5564         start_i = 0;
5565         last_non_zero = -1;
5566         qmat = s->q_inter_matrix[qscale];
5567         length     = s->inter_ac_vlc_length;
5568         last_length= s->inter_ac_vlc_last_length;
5569     }
5570     last_i= start_i;
5571
5572     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5573     threshold2= (threshold1<<1);
5574
5575     for(i=63; i>=start_i; i--) {
5576         const int j = scantable[i];
5577         int level = block[j] * qmat[j];
5578
5579         if(((unsigned)(level+threshold1))>threshold2){
5580             last_non_zero = i;
5581             break;
5582         }
5583     }
5584
5585     for(i=start_i; i<=last_non_zero; i++) {
5586         const int j = scantable[i];
5587         int level = block[j] * qmat[j];
5588
5589 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5590 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5591         if(((unsigned)(level+threshold1))>threshold2){
5592             if(level>0){
5593                 level= (bias + level)>>QMAT_SHIFT;
5594                 coeff[0][i]= level;
5595                 coeff[1][i]= level-1;
5596 //                coeff[2][k]= level-2;
5597             }else{
5598                 level= (bias - level)>>QMAT_SHIFT;
5599                 coeff[0][i]= -level;
5600                 coeff[1][i]= -level+1;
5601 //                coeff[2][k]= -level+2;
5602             }
5603             coeff_count[i]= FFMIN(level, 2);
5604             assert(coeff_count[i]);
5605             max |=level;
5606         }else{
5607             coeff[0][i]= (level>>31)|1;
5608             coeff_count[i]= 1;
5609         }
5610     }
5611
5612     *overflow= s->max_qcoeff < max; //overflow might have happened
5613
5614     if(last_non_zero < start_i){
5615         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5616         return last_non_zero;
5617     }
5618
5619     score_tab[start_i]= 0;
5620     survivor[0]= start_i;
5621     survivor_count= 1;
5622
5623     for(i=start_i; i<=last_non_zero; i++){
5624         int level_index, j;
5625         const int dct_coeff= ABS(block[ scantable[i] ]);
5626         const int zero_distoration= dct_coeff*dct_coeff;
5627         int best_score=256*256*256*120;
5628         for(level_index=0; level_index < coeff_count[i]; level_index++){
5629             int distoration;
5630             int level= coeff[level_index][i];
5631             const int alevel= ABS(level);
5632             int unquant_coeff;
5633
5634             assert(level);
5635
5636             if(s->out_format == FMT_H263){
5637                 unquant_coeff= alevel*qmul + qadd;
5638             }else{ //MPEG1
5639                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5640                 if(s->mb_intra){
5641                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5642                         unquant_coeff =   (unquant_coeff - 1) | 1;
5643                 }else{
5644                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5645                         unquant_coeff =   (unquant_coeff - 1) | 1;
5646                 }
5647                 unquant_coeff<<= 3;
5648             }
5649
5650             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5651             level+=64;
5652             if((level&(~127)) == 0){
5653                 for(j=survivor_count-1; j>=0; j--){
5654                     int run= i - survivor[j];
5655                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5656                     score += score_tab[i-run];
5657
5658                     if(score < best_score){
5659                         best_score= score;
5660                         run_tab[i+1]= run;
5661                         level_tab[i+1]= level-64;
5662                     }
5663                 }
5664
5665                 if(s->out_format == FMT_H263){
5666                     for(j=survivor_count-1; j>=0; j--){
5667                         int run= i - survivor[j];
5668                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5669                         score += score_tab[i-run];
5670                         if(score < last_score){
5671                             last_score= score;
5672                             last_run= run;
5673                             last_level= level-64;
5674                             last_i= i+1;
5675                         }
5676                     }
5677                 }
5678             }else{
5679                 distoration += esc_length*lambda;
5680                 for(j=survivor_count-1; j>=0; j--){
5681                     int run= i - survivor[j];
5682                     int score= distoration + score_tab[i-run];
5683
5684                     if(score < best_score){
5685                         best_score= score;
5686                         run_tab[i+1]= run;
5687                         level_tab[i+1]= level-64;
5688                     }
5689                 }
5690
5691                 if(s->out_format == FMT_H263){
5692                   for(j=survivor_count-1; j>=0; j--){
5693                         int run= i - survivor[j];
5694                         int score= distoration + score_tab[i-run];
5695                         if(score < last_score){
5696                             last_score= score;
5697                             last_run= run;
5698                             last_level= level-64;
5699                             last_i= i+1;
5700                         }
5701                     }
5702                 }
5703             }
5704         }
5705
5706         score_tab[i+1]= best_score;
5707
5708         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5709         if(last_non_zero <= 27){
5710             for(; survivor_count; survivor_count--){
5711                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5712                     break;
5713             }
5714         }else{
5715             for(; survivor_count; survivor_count--){
5716                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5717                     break;
5718             }
5719         }
5720
5721         survivor[ survivor_count++ ]= i+1;
5722     }
5723
5724     if(s->out_format != FMT_H263){
5725         last_score= 256*256*256*120;
5726         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5727             int score= score_tab[i];
5728             if(i) score += lambda*2; //FIXME exacter?
5729
5730             if(score < last_score){
5731                 last_score= score;
5732                 last_i= i;
5733                 last_level= level_tab[i];
5734                 last_run= run_tab[i];
5735             }
5736         }
5737     }
5738
5739     s->coded_score[n] = last_score;
5740
5741     dc= ABS(block[0]);
5742     last_non_zero= last_i - 1;
5743     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5744
5745     if(last_non_zero < start_i)
5746         return last_non_zero;
5747
5748     if(last_non_zero == 0 && start_i == 0){
5749         int best_level= 0;
5750         int best_score= dc * dc;
5751
5752         for(i=0; i<coeff_count[0]; i++){
5753             int level= coeff[i][0];
5754             int alevel= ABS(level);
5755             int unquant_coeff, score, distortion;
5756
5757             if(s->out_format == FMT_H263){
5758                     unquant_coeff= (alevel*qmul + qadd)>>3;
5759             }else{ //MPEG1
5760                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5761                     unquant_coeff =   (unquant_coeff - 1) | 1;
5762             }
5763             unquant_coeff = (unquant_coeff + 4) >> 3;
5764             unquant_coeff<<= 3 + 3;
5765
5766             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5767             level+=64;
5768             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5769             else                    score= distortion + esc_length*lambda;
5770
5771             if(score < best_score){
5772                 best_score= score;
5773                 best_level= level - 64;
5774             }
5775         }
5776         block[0]= best_level;
5777         s->coded_score[n] = best_score - dc*dc;
5778         if(best_level == 0) return -1;
5779         else                return last_non_zero;
5780     }
5781
5782     i= last_i;
5783     assert(last_level);
5784
5785     block[ perm_scantable[last_non_zero] ]= last_level;
5786     i -= last_run + 1;
5787
5788     for(; i>start_i; i -= run_tab[i] + 1){
5789         block[ perm_scantable[i-1] ]= level_tab[i];
5790     }
5791
5792     return last_non_zero;
5793 }
5794
5795 //#define REFINE_STATS 1
5796 static int16_t basis[64][64];
5797
5798 static void build_basis(uint8_t *perm){
5799     int i, j, x, y;
5800     emms_c();
5801     for(i=0; i<8; i++){
5802         for(j=0; j<8; j++){
5803             for(y=0; y<8; y++){
5804                 for(x=0; x<8; x++){
5805                     double s= 0.25*(1<<BASIS_SHIFT);
5806                     int index= 8*i + j;
5807                     int perm_index= perm[index];
5808                     if(i==0) s*= sqrt(0.5);
5809                     if(j==0) s*= sqrt(0.5);
5810                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5811                 }
5812             }
5813         }
5814     }
5815 }
5816
5817 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5818                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5819                         int n, int qscale){
5820     int16_t rem[64];
5821     DCTELEM d1[64] __align16;
5822     const int *qmat;
5823     const uint8_t *scantable= s->intra_scantable.scantable;
5824     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5825 //    unsigned int threshold1, threshold2;
5826 //    int bias=0;
5827     int run_tab[65];
5828     int prev_run=0;
5829     int prev_level=0;
5830     int qmul, qadd, start_i, last_non_zero, i, dc;
5831     uint8_t * length;
5832     uint8_t * last_length;
5833     int lambda;
5834     int rle_index, run, q, sum;
5835 #ifdef REFINE_STATS
5836 static int count=0;
5837 static int after_last=0;
5838 static int to_zero=0;
5839 static int from_zero=0;
5840 static int raise=0;
5841 static int lower=0;
5842 static int messed_sign=0;
5843 #endif
5844
5845     if(basis[0][0] == 0)
5846         build_basis(s->dsp.idct_permutation);
5847
5848     qmul= qscale*2;
5849     qadd= (qscale-1)|1;
5850     if (s->mb_intra) {
5851         if (!s->h263_aic) {
5852             if (n < 4)
5853                 q = s->y_dc_scale;
5854             else
5855                 q = s->c_dc_scale;
5856         } else{
5857             /* For AIC we skip quant/dequant of INTRADC */
5858             q = 1;
5859             qadd=0;
5860         }
5861         q <<= RECON_SHIFT-3;
5862         /* note: block[0] is assumed to be positive */
5863         dc= block[0]*q;
5864 //        block[0] = (block[0] + (q >> 1)) / q;
5865         start_i = 1;
5866         qmat = s->q_intra_matrix[qscale];
5867 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5868 //            bias= 1<<(QMAT_SHIFT-1);
5869         length     = s->intra_ac_vlc_length;
5870         last_length= s->intra_ac_vlc_last_length;
5871     } else {
5872         dc= 0;
5873         start_i = 0;
5874         qmat = s->q_inter_matrix[qscale];
5875         length     = s->inter_ac_vlc_length;
5876         last_length= s->inter_ac_vlc_last_length;
5877     }
5878     last_non_zero = s->block_last_index[n];
5879
5880 #ifdef REFINE_STATS
5881 {START_TIMER
5882 #endif
5883     dc += (1<<(RECON_SHIFT-1));
5884     for(i=0; i<64; i++){
5885         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5886     }
5887 #ifdef REFINE_STATS
5888 STOP_TIMER("memset rem[]")}
5889 #endif
5890     sum=0;
5891     for(i=0; i<64; i++){
5892         int one= 36;
5893         int qns=4;
5894         int w;
5895
5896         w= ABS(weight[i]) + qns*one;
5897         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5898
5899         weight[i] = w;
5900 //        w=weight[i] = (63*qns + (w/2)) / w;
5901
5902         assert(w>0);
5903         assert(w<(1<<6));
5904         sum += w*w;
5905     }
5906     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5907 #ifdef REFINE_STATS
5908 {START_TIMER
5909 #endif
5910     run=0;
5911     rle_index=0;
5912     for(i=start_i; i<=last_non_zero; i++){
5913         int j= perm_scantable[i];
5914         const int level= block[j];
5915         int coeff;
5916
5917         if(level){
5918             if(level<0) coeff= qmul*level - qadd;
5919             else        coeff= qmul*level + qadd;
5920             run_tab[rle_index++]=run;
5921             run=0;
5922
5923             s->dsp.add_8x8basis(rem, basis[j], coeff);
5924         }else{
5925             run++;
5926         }
5927     }
5928 #ifdef REFINE_STATS
5929 if(last_non_zero>0){
5930 STOP_TIMER("init rem[]")
5931 }
5932 }
5933
5934 {START_TIMER
5935 #endif
5936     for(;;){
5937         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5938         int best_coeff=0;
5939         int best_change=0;
5940         int run2, best_unquant_change=0, analyze_gradient;
5941 #ifdef REFINE_STATS
5942 {START_TIMER
5943 #endif
5944         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5945
5946         if(analyze_gradient){
5947 #ifdef REFINE_STATS
5948 {START_TIMER
5949 #endif
5950             for(i=0; i<64; i++){
5951                 int w= weight[i];
5952
5953                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5954             }
5955 #ifdef REFINE_STATS
5956 STOP_TIMER("rem*w*w")}
5957 {START_TIMER
5958 #endif
5959             s->dsp.fdct(d1);
5960 #ifdef REFINE_STATS
5961 STOP_TIMER("dct")}
5962 #endif
5963         }
5964
5965         if(start_i){
5966             const int level= block[0];
5967             int change, old_coeff;
5968
5969             assert(s->mb_intra);
5970
5971             old_coeff= q*level;
5972
5973             for(change=-1; change<=1; change+=2){
5974                 int new_level= level + change;
5975                 int score, new_coeff;
5976
5977                 new_coeff= q*new_level;
5978                 if(new_coeff >= 2048 || new_coeff < 0)
5979                     continue;
5980
5981                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5982                 if(score<best_score){
5983                     best_score= score;
5984                     best_coeff= 0;
5985                     best_change= change;
5986                     best_unquant_change= new_coeff - old_coeff;
5987                 }
5988             }
5989         }
5990
5991         run=0;
5992         rle_index=0;
5993         run2= run_tab[rle_index++];
5994         prev_level=0;
5995         prev_run=0;
5996
5997         for(i=start_i; i<64; i++){
5998             int j= perm_scantable[i];
5999             const int level= block[j];
6000             int change, old_coeff;
6001
6002             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6003                 break;
6004
6005             if(level){
6006                 if(level<0) old_coeff= qmul*level - qadd;
6007                 else        old_coeff= qmul*level + qadd;
6008                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6009             }else{
6010                 old_coeff=0;
6011                 run2--;
6012                 assert(run2>=0 || i >= last_non_zero );
6013             }
6014
6015             for(change=-1; change<=1; change+=2){
6016                 int new_level= level + change;
6017                 int score, new_coeff, unquant_change;
6018
6019                 score=0;
6020                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6021                    continue;
6022
6023                 if(new_level){
6024                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6025                     else            new_coeff= qmul*new_level + qadd;
6026                     if(new_coeff >= 2048 || new_coeff <= -2048)
6027                         continue;
6028                     //FIXME check for overflow
6029
6030                     if(level){
6031                         if(level < 63 && level > -63){
6032                             if(i < last_non_zero)
6033                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6034                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6035                             else
6036                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6037                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6038                         }
6039                     }else{
6040                         assert(ABS(new_level)==1);
6041
6042                         if(analyze_gradient){
6043                             int g= d1[ scantable[i] ];
6044                             if(g && (g^new_level) >= 0)
6045                                 continue;
6046                         }
6047
6048                         if(i < last_non_zero){
6049                             int next_i= i + run2 + 1;
6050                             int next_level= block[ perm_scantable[next_i] ] + 64;
6051
6052                             if(next_level&(~127))
6053                                 next_level= 0;
6054
6055                             if(next_i < last_non_zero)
6056                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6057                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6058                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6059                             else
6060                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6061                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6062                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6063                         }else{
6064                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6065                             if(prev_level){
6066                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6067                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6068                             }
6069                         }
6070                     }
6071                 }else{
6072                     new_coeff=0;
6073                     assert(ABS(level)==1);
6074
6075                     if(i < last_non_zero){
6076                         int next_i= i + run2 + 1;
6077                         int next_level= block[ perm_scantable[next_i] ] + 64;
6078
6079                         if(next_level&(~127))
6080                             next_level= 0;
6081
6082                         if(next_i < last_non_zero)
6083                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6084                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6085                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6086                         else
6087                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6088                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6089                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6090                     }else{
6091                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6092                         if(prev_level){
6093                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6094                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6095                         }
6096                     }
6097                 }
6098
6099                 score *= lambda;
6100
6101                 unquant_change= new_coeff - old_coeff;
6102                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6103
6104                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6105                 if(score<best_score){
6106                     best_score= score;
6107                     best_coeff= i;
6108                     best_change= change;
6109                     best_unquant_change= unquant_change;
6110                 }
6111             }
6112             if(level){
6113                 prev_level= level + 64;
6114                 if(prev_level&(~127))
6115                     prev_level= 0;
6116                 prev_run= run;
6117                 run=0;
6118             }else{
6119                 run++;
6120             }
6121         }
6122 #ifdef REFINE_STATS
6123 STOP_TIMER("iterative step")}
6124 #endif
6125
6126         if(best_change){
6127             int j= perm_scantable[ best_coeff ];
6128
6129             block[j] += best_change;
6130
6131             if(best_coeff > last_non_zero){
6132                 last_non_zero= best_coeff;
6133                 assert(block[j]);
6134 #ifdef REFINE_STATS
6135 after_last++;
6136 #endif
6137             }else{
6138 #ifdef REFINE_STATS
6139 if(block[j]){
6140     if(block[j] - best_change){
6141         if(ABS(block[j]) > ABS(block[j] - best_change)){
6142             raise++;
6143         }else{
6144             lower++;
6145         }
6146     }else{
6147         from_zero++;
6148     }
6149 }else{
6150     to_zero++;
6151 }
6152 #endif
6153                 for(; last_non_zero>=start_i; last_non_zero--){
6154                     if(block[perm_scantable[last_non_zero]])
6155                         break;
6156                 }
6157             }
6158 #ifdef REFINE_STATS
6159 count++;
6160 if(256*256*256*64 % count == 0){
6161     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6162 }
6163 #endif
6164             run=0;
6165             rle_index=0;
6166             for(i=start_i; i<=last_non_zero; i++){
6167                 int j= perm_scantable[i];
6168                 const int level= block[j];
6169
6170                  if(level){
6171                      run_tab[rle_index++]=run;
6172                      run=0;
6173                  }else{
6174                      run++;
6175                  }
6176             }
6177
6178             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6179         }else{
6180             break;
6181         }
6182     }
6183 #ifdef REFINE_STATS
6184 if(last_non_zero>0){
6185 STOP_TIMER("iterative search")
6186 }
6187 }
6188 #endif
6189
6190     return last_non_zero;
6191 }
6192
6193 static int dct_quantize_c(MpegEncContext *s,
6194                         DCTELEM *block, int n,
6195                         int qscale, int *overflow)
6196 {
6197     int i, j, level, last_non_zero, q, start_i;
6198     const int *qmat;
6199     const uint8_t *scantable= s->intra_scantable.scantable;
6200     int bias;
6201     int max=0;
6202     unsigned int threshold1, threshold2;
6203
6204     s->dsp.fdct (block);
6205
6206     if(s->dct_error_sum)
6207         s->denoise_dct(s, block);
6208
6209     if (s->mb_intra) {
6210         if (!s->h263_aic) {
6211             if (n < 4)
6212                 q = s->y_dc_scale;
6213             else
6214                 q = s->c_dc_scale;
6215             q = q << 3;
6216         } else
6217             /* For AIC we skip quant/dequant of INTRADC */
6218             q = 1 << 3;
6219
6220         /* note: block[0] is assumed to be positive */
6221         block[0] = (block[0] + (q >> 1)) / q;
6222         start_i = 1;
6223         last_non_zero = 0;
6224         qmat = s->q_intra_matrix[qscale];
6225         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6226     } else {
6227         start_i = 0;
6228         last_non_zero = -1;
6229         qmat = s->q_inter_matrix[qscale];
6230         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6231     }
6232     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6233     threshold2= (threshold1<<1);
6234     for(i=63;i>=start_i;i--) {
6235         j = scantable[i];
6236         level = block[j] * qmat[j];
6237
6238         if(((unsigned)(level+threshold1))>threshold2){
6239             last_non_zero = i;
6240             break;
6241         }else{
6242             block[j]=0;
6243         }
6244     }
6245     for(i=start_i; i<=last_non_zero; i++) {
6246         j = scantable[i];
6247         level = block[j] * qmat[j];
6248
6249 //        if(   bias+level >= (1<<QMAT_SHIFT)
6250 //           || bias-level >= (1<<QMAT_SHIFT)){
6251         if(((unsigned)(level+threshold1))>threshold2){
6252             if(level>0){
6253                 level= (bias + level)>>QMAT_SHIFT;
6254                 block[j]= level;
6255             }else{
6256                 level= (bias - level)>>QMAT_SHIFT;
6257                 block[j]= -level;
6258             }
6259             max |=level;
6260         }else{
6261             block[j]=0;
6262         }
6263     }
6264     *overflow= s->max_qcoeff < max; //overflow might have happened
6265
6266     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6267     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6268         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6269
6270     return last_non_zero;
6271 }
6272
6273 #endif //CONFIG_ENCODERS
6274
6275 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6276                                    DCTELEM *block, int n, int qscale)
6277 {
6278     int i, level, nCoeffs;
6279     const uint16_t *quant_matrix;
6280
6281     nCoeffs= s->block_last_index[n];
6282
6283     if (n < 4)
6284         block[0] = block[0] * s->y_dc_scale;
6285     else
6286         block[0] = block[0] * s->c_dc_scale;
6287     /* XXX: only mpeg1 */
6288     quant_matrix = s->intra_matrix;
6289     for(i=1;i<=nCoeffs;i++) {
6290         int j= s->intra_scantable.permutated[i];
6291         level = block[j];
6292         if (level) {
6293             if (level < 0) {
6294                 level = -level;
6295                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6296                 level = (level - 1) | 1;
6297                 level = -level;
6298             } else {
6299                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6300                 level = (level - 1) | 1;
6301             }
6302             block[j] = level;
6303         }
6304     }
6305 }
6306
6307 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6308                                    DCTELEM *block, int n, int qscale)
6309 {
6310     int i, level, nCoeffs;
6311     const uint16_t *quant_matrix;
6312
6313     nCoeffs= s->block_last_index[n];
6314
6315     quant_matrix = s->inter_matrix;
6316     for(i=0; i<=nCoeffs; i++) {
6317         int j= s->intra_scantable.permutated[i];
6318         level = block[j];
6319         if (level) {
6320             if (level < 0) {
6321                 level = -level;
6322                 level = (((level << 1) + 1) * qscale *
6323                          ((int) (quant_matrix[j]))) >> 4;
6324                 level = (level - 1) | 1;
6325                 level = -level;
6326             } else {
6327                 level = (((level << 1) + 1) * qscale *
6328                          ((int) (quant_matrix[j]))) >> 4;
6329                 level = (level - 1) | 1;
6330             }
6331             block[j] = level;
6332         }
6333     }
6334 }
6335
6336 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6337                                    DCTELEM *block, int n, int qscale)
6338 {
6339     int i, level, nCoeffs;
6340     const uint16_t *quant_matrix;
6341
6342     if(s->alternate_scan) nCoeffs= 63;
6343     else nCoeffs= s->block_last_index[n];
6344
6345     if (n < 4)
6346         block[0] = block[0] * s->y_dc_scale;
6347     else
6348         block[0] = block[0] * s->c_dc_scale;
6349     quant_matrix = s->intra_matrix;
6350     for(i=1;i<=nCoeffs;i++) {
6351         int j= s->intra_scantable.permutated[i];
6352         level = block[j];
6353         if (level) {
6354             if (level < 0) {
6355                 level = -level;
6356                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6357                 level = -level;
6358             } else {
6359                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6360             }
6361             block[j] = level;
6362         }
6363     }
6364 }
6365
6366 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6367                                    DCTELEM *block, int n, int qscale)
6368 {
6369     int i, level, nCoeffs;
6370     const uint16_t *quant_matrix;
6371     int sum=-1;
6372
6373     if(s->alternate_scan) nCoeffs= 63;
6374     else nCoeffs= s->block_last_index[n];
6375
6376     quant_matrix = s->inter_matrix;
6377     for(i=0; i<=nCoeffs; i++) {
6378         int j= s->intra_scantable.permutated[i];
6379         level = block[j];
6380         if (level) {
6381             if (level < 0) {
6382                 level = -level;
6383                 level = (((level << 1) + 1) * qscale *
6384                          ((int) (quant_matrix[j]))) >> 4;
6385                 level = -level;
6386             } else {
6387                 level = (((level << 1) + 1) * qscale *
6388                          ((int) (quant_matrix[j]))) >> 4;
6389             }
6390             block[j] = level;
6391             sum+=level;
6392         }
6393     }
6394     block[63]^=sum&1;
6395 }
6396
6397 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6398                                   DCTELEM *block, int n, int qscale)
6399 {
6400     int i, level, qmul, qadd;
6401     int nCoeffs;
6402
6403     assert(s->block_last_index[n]>=0);
6404
6405     qmul = qscale << 1;
6406
6407     if (!s->h263_aic) {
6408         if (n < 4)
6409             block[0] = block[0] * s->y_dc_scale;
6410         else
6411             block[0] = block[0] * s->c_dc_scale;
6412         qadd = (qscale - 1) | 1;
6413     }else{
6414         qadd = 0;
6415     }
6416     if(s->ac_pred)
6417         nCoeffs=63;
6418     else
6419         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6420
6421     for(i=1; i<=nCoeffs; i++) {
6422         level = block[i];
6423         if (level) {
6424             if (level < 0) {
6425                 level = level * qmul - qadd;
6426             } else {
6427                 level = level * qmul + qadd;
6428             }
6429             block[i] = level;
6430         }
6431     }
6432 }
6433
6434 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6435                                   DCTELEM *block, int n, int qscale)
6436 {
6437     int i, level, qmul, qadd;
6438     int nCoeffs;
6439
6440     assert(s->block_last_index[n]>=0);
6441
6442     qadd = (qscale - 1) | 1;
6443     qmul = qscale << 1;
6444
6445     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6446
6447     for(i=0; i<=nCoeffs; i++) {
6448         level = block[i];
6449         if (level) {
6450             if (level < 0) {
6451                 level = level * qmul - qadd;
6452             } else {
6453                 level = level * qmul + qadd;
6454             }
6455             block[i] = level;
6456         }
6457     }
6458 }
6459
6460 #ifdef CONFIG_ENCODERS
6461 AVCodec h263_encoder = {
6462     "h263",
6463     CODEC_TYPE_VIDEO,
6464     CODEC_ID_H263,
6465     sizeof(MpegEncContext),
6466     MPV_encode_init,
6467     MPV_encode_picture,
6468     MPV_encode_end,
6469     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6470 };
6471
6472 AVCodec h263p_encoder = {
6473     "h263p",
6474     CODEC_TYPE_VIDEO,
6475     CODEC_ID_H263P,
6476     sizeof(MpegEncContext),
6477     MPV_encode_init,
6478     MPV_encode_picture,
6479     MPV_encode_end,
6480     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6481 };
6482
6483 AVCodec flv_encoder = {
6484     "flv",
6485     CODEC_TYPE_VIDEO,
6486     CODEC_ID_FLV1,
6487     sizeof(MpegEncContext),
6488     MPV_encode_init,
6489     MPV_encode_picture,
6490     MPV_encode_end,
6491     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6492 };
6493
6494 AVCodec rv10_encoder = {
6495     "rv10",
6496     CODEC_TYPE_VIDEO,
6497     CODEC_ID_RV10,
6498     sizeof(MpegEncContext),
6499     MPV_encode_init,
6500     MPV_encode_picture,
6501     MPV_encode_end,
6502     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6503 };
6504
6505 AVCodec rv20_encoder = {
6506     "rv20",
6507     CODEC_TYPE_VIDEO,
6508     CODEC_ID_RV20,
6509     sizeof(MpegEncContext),
6510     MPV_encode_init,
6511     MPV_encode_picture,
6512     MPV_encode_end,
6513     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6514 };
6515
6516 AVCodec mpeg4_encoder = {
6517     "mpeg4",
6518     CODEC_TYPE_VIDEO,
6519     CODEC_ID_MPEG4,
6520     sizeof(MpegEncContext),
6521     MPV_encode_init,
6522     MPV_encode_picture,
6523     MPV_encode_end,
6524     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6525     .capabilities= CODEC_CAP_DELAY,
6526 };
6527
6528 AVCodec msmpeg4v1_encoder = {
6529     "msmpeg4v1",
6530     CODEC_TYPE_VIDEO,
6531     CODEC_ID_MSMPEG4V1,
6532     sizeof(MpegEncContext),
6533     MPV_encode_init,
6534     MPV_encode_picture,
6535     MPV_encode_end,
6536     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6537 };
6538
6539 AVCodec msmpeg4v2_encoder = {
6540     "msmpeg4v2",
6541     CODEC_TYPE_VIDEO,
6542     CODEC_ID_MSMPEG4V2,
6543     sizeof(MpegEncContext),
6544     MPV_encode_init,
6545     MPV_encode_picture,
6546     MPV_encode_end,
6547     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6548 };
6549
6550 AVCodec msmpeg4v3_encoder = {
6551     "msmpeg4",
6552     CODEC_TYPE_VIDEO,
6553     CODEC_ID_MSMPEG4V3,
6554     sizeof(MpegEncContext),
6555     MPV_encode_init,
6556     MPV_encode_picture,
6557     MPV_encode_end,
6558     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6559 };
6560
6561 AVCodec wmv1_encoder = {
6562     "wmv1",
6563     CODEC_TYPE_VIDEO,
6564     CODEC_ID_WMV1,
6565     sizeof(MpegEncContext),
6566     MPV_encode_init,
6567     MPV_encode_picture,
6568     MPV_encode_end,
6569     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6570 };
6571
6572 AVCodec mjpeg_encoder = {
6573     "mjpeg",
6574     CODEC_TYPE_VIDEO,
6575     CODEC_ID_MJPEG,
6576     sizeof(MpegEncContext),
6577     MPV_encode_init,
6578     MPV_encode_picture,
6579     MPV_encode_end,
6580     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6581 };
6582
6583 #endif //CONFIG_ENCODERS