git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
 546     COPY(progressive_frame); //FIXME dont set in encode_header
 547     COPY(partitioned_frame); //FIXME dont set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i, dummy;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1069     if(i > 1){
1070         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1071         avctx->time_base.den /= i;
1072         avctx->time_base.num /= i;
1073 //        return -1;
1074     }
1075
1076     if(s->codec_id==CODEC_ID_MJPEG){
1077         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1078         s->inter_quant_bias= 0;
1079     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1080         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1081         s->inter_quant_bias= 0;
1082     }else{
1083         s->intra_quant_bias=0;
1084         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1085     }
1086
1087     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1088         s->intra_quant_bias= avctx->intra_quant_bias;
1089     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1090         s->inter_quant_bias= avctx->inter_quant_bias;
1091
1092     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1093
1094     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1095         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1096         return -1;
1097     }
1098     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1099
1100     switch(avctx->codec->id) {
1101     case CODEC_ID_MPEG1VIDEO:
1102         s->out_format = FMT_MPEG1;
1103         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1104         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1105         break;
1106     case CODEC_ID_MPEG2VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         s->rtp_mode= 1;
1111         break;
1112     case CODEC_ID_LJPEG:
1113     case CODEC_ID_MJPEG:
1114         s->out_format = FMT_MJPEG;
1115         s->intra_only = 1; /* force intra only for jpeg */
1116         s->mjpeg_write_tables = 1; /* write all tables */
1117         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1118         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1119         s->mjpeg_vsample[1] = 1;
1120         s->mjpeg_vsample[2] = 1;
1121         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1122         s->mjpeg_hsample[1] = 1;
1123         s->mjpeg_hsample[2] = 1;
1124         if (mjpeg_init(s) < 0)
1125             return -1;
1126         avctx->delay=0;
1127         s->low_delay=1;
1128         break;
1129     case CODEC_ID_H261:
1130         s->out_format = FMT_H261;
1131         avctx->delay=0;
1132         s->low_delay=1;
1133         break;
1134     case CODEC_ID_H263:
1135         if (h263_get_picture_format(s->width, s->height) == 7) {
1136             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1137             return -1;
1138         }
1139         s->out_format = FMT_H263;
1140         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1141         avctx->delay=0;
1142         s->low_delay=1;
1143         break;
1144     case CODEC_ID_H263P:
1145         s->out_format = FMT_H263;
1146         s->h263_plus = 1;
1147         /* Fx */
1148         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1149         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1150         s->modified_quant= s->h263_aic;
1151         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1152         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1153         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1154         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1155         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1156
1157         /* /Fx */
1158         /* These are just to be sure */
1159         avctx->delay=0;
1160         s->low_delay=1;
1161         break;
1162     case CODEC_ID_FLV1:
1163         s->out_format = FMT_H263;
1164         s->h263_flv = 2; /* format = 1; 11-bit codes */
1165         s->unrestricted_mv = 1;
1166         s->rtp_mode=0; /* don't allow GOB */
1167         avctx->delay=0;
1168         s->low_delay=1;
1169         break;
1170     case CODEC_ID_RV10:
1171         s->out_format = FMT_H263;
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175     case CODEC_ID_RV20:
1176         s->out_format = FMT_H263;
1177         avctx->delay=0;
1178         s->low_delay=1;
1179         s->modified_quant=1;
1180         s->h263_aic=1;
1181         s->h263_plus=1;
1182         s->loop_filter=1;
1183         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1184         break;
1185     case CODEC_ID_MPEG4:
1186         s->out_format = FMT_H263;
1187         s->h263_pred = 1;
1188         s->unrestricted_mv = 1;
1189         s->low_delay= s->max_b_frames ? 0 : 1;
1190         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1191         break;
1192     case CODEC_ID_MSMPEG4V1:
1193         s->out_format = FMT_H263;
1194         s->h263_msmpeg4 = 1;
1195         s->h263_pred = 1;
1196         s->unrestricted_mv = 1;
1197         s->msmpeg4_version= 1;
1198         avctx->delay=0;
1199         s->low_delay=1;
1200         break;
1201     case CODEC_ID_MSMPEG4V2:
1202         s->out_format = FMT_H263;
1203         s->h263_msmpeg4 = 1;
1204         s->h263_pred = 1;
1205         s->unrestricted_mv = 1;
1206         s->msmpeg4_version= 2;
1207         avctx->delay=0;
1208         s->low_delay=1;
1209         break;
1210     case CODEC_ID_MSMPEG4V3:
1211         s->out_format = FMT_H263;
1212         s->h263_msmpeg4 = 1;
1213         s->h263_pred = 1;
1214         s->unrestricted_mv = 1;
1215         s->msmpeg4_version= 3;
1216         s->flipflop_rounding=1;
1217         avctx->delay=0;
1218         s->low_delay=1;
1219         break;
1220     case CODEC_ID_WMV1:
1221         s->out_format = FMT_H263;
1222         s->h263_msmpeg4 = 1;
1223         s->h263_pred = 1;
1224         s->unrestricted_mv = 1;
1225         s->msmpeg4_version= 4;
1226         s->flipflop_rounding=1;
1227         avctx->delay=0;
1228         s->low_delay=1;
1229         break;
1230     case CODEC_ID_WMV2:
1231         s->out_format = FMT_H263;
1232         s->h263_msmpeg4 = 1;
1233         s->h263_pred = 1;
1234         s->unrestricted_mv = 1;
1235         s->msmpeg4_version= 5;
1236         s->flipflop_rounding=1;
1237         avctx->delay=0;
1238         s->low_delay=1;
1239         break;
1240     default:
1241         return -1;
1242     }
1243
1244     avctx->has_b_frames= !s->low_delay;
1245
1246     s->encoding = 1;
1247
1248     /* init */
1249     if (MPV_common_init(s) < 0)
1250         return -1;
1251
1252     if(s->modified_quant)
1253         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1254     s->progressive_frame=
1255     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1256     s->quant_precision=5;
1257
1258     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1259     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1260
1261 #ifdef CONFIG_H261_ENCODER
1262     if (s->out_format == FMT_H261)
1263         ff_h261_encode_init(s);
1264 #endif
1265     if (s->out_format == FMT_H263)
1266         h263_encode_init(s);
1267     if(s->msmpeg4_version)
1268         ff_msmpeg4_encode_init(s);
1269     if (s->out_format == FMT_MPEG1)
1270         ff_mpeg1_encode_init(s);
1271
1272     /* init q matrix */
1273     for(i=0;i<64;i++) {
1274         int j= s->dsp.idct_permutation[i];
1275         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1276             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1277             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1278         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1279             s->intra_matrix[j] =
1280             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1281         }else
1282         { /* mpeg1/2 */
1283             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1284             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1285         }
1286         if(s->avctx->intra_matrix)
1287             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1288         if(s->avctx->inter_matrix)
1289             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1290     }
1291
1292     /* precompute matrix */
1293     /* for mjpeg, we do include qscale in the matrix */
1294     if (s->out_format != FMT_MJPEG) {
1295         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1296                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1297         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1298                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1299     }
1300
1301     if(ff_rate_control_init(s) < 0)
1302         return -1;
1303
1304     return 0;
1305 }
1306
1307 int MPV_encode_end(AVCodecContext *avctx)
1308 {
1309     MpegEncContext *s = avctx->priv_data;
1310
1311 #ifdef STATS
1312     print_stats();
1313 #endif
1314
1315     ff_rate_control_uninit(s);
1316
1317     MPV_common_end(s);
1318     if (s->out_format == FMT_MJPEG)
1319         mjpeg_close(s);
1320
1321     av_freep(&avctx->extradata);
1322
1323     return 0;
1324 }
1325
1326 #endif //CONFIG_ENCODERS
1327
1328 void init_rl(RLTable *rl, int use_static)
1329 {
1330     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1331     uint8_t index_run[MAX_RUN+1];
1332     int last, run, level, start, end, i;
1333
1334     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1335     if(use_static && rl->max_level[0])
1336         return;
1337
1338     /* compute max_level[], max_run[] and index_run[] */
1339     for(last=0;last<2;last++) {
1340         if (last == 0) {
1341             start = 0;
1342             end = rl->last;
1343         } else {
1344             start = rl->last;
1345             end = rl->n;
1346         }
1347
1348         memset(max_level, 0, MAX_RUN + 1);
1349         memset(max_run, 0, MAX_LEVEL + 1);
1350         memset(index_run, rl->n, MAX_RUN + 1);
1351         for(i=start;i<end;i++) {
1352             run = rl->table_run[i];
1353             level = rl->table_level[i];
1354             if (index_run[run] == rl->n)
1355                 index_run[run] = i;
1356             if (level > max_level[run])
1357                 max_level[run] = level;
1358             if (run > max_run[level])
1359                 max_run[level] = run;
1360         }
1361         if(use_static)
1362             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1363         else
1364             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1365         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1366         if(use_static)
1367             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1368         else
1369             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1370         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1371         if(use_static)
1372             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1373         else
1374             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1375         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1376     }
1377 }
1378
1379 /* draw the edges of width 'w' of an image of size width, height */
1380 //FIXME check that this is ok for mpeg4 interlaced
1381 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1382 {
1383     uint8_t *ptr, *last_line;
1384     int i;
1385
1386     last_line = buf + (height - 1) * wrap;
1387     for(i=0;i<w;i++) {
1388         /* top and bottom */
1389         memcpy(buf - (i + 1) * wrap, buf, width);
1390         memcpy(last_line + (i + 1) * wrap, last_line, width);
1391     }
1392     /* left and right */
1393     ptr = buf;
1394     for(i=0;i<height;i++) {
1395         memset(ptr - w, ptr[0], w);
1396         memset(ptr + width, ptr[width-1], w);
1397         ptr += wrap;
1398     }
1399     /* corners */
1400     for(i=0;i<w;i++) {
1401         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1402         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1403         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1404         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1405     }
1406 }
1407
1408 int ff_find_unused_picture(MpegEncContext *s, int shared){
1409     int i;
1410
1411     if(shared){
1412         for(i=0; i<MAX_PICTURE_COUNT; i++){
1413             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1414         }
1415     }else{
1416         for(i=0; i<MAX_PICTURE_COUNT; i++){
1417             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1418         }
1419         for(i=0; i<MAX_PICTURE_COUNT; i++){
1420             if(s->picture[i].data[0]==NULL) return i;
1421         }
1422     }
1423
1424     assert(0);
1425     return -1;
1426 }
1427
1428 static void update_noise_reduction(MpegEncContext *s){
1429     int intra, i;
1430
1431     for(intra=0; intra<2; intra++){
1432         if(s->dct_count[intra] > (1<<16)){
1433             for(i=0; i<64; i++){
1434                 s->dct_error_sum[intra][i] >>=1;
1435             }
1436             s->dct_count[intra] >>= 1;
1437         }
1438
1439         for(i=0; i<64; i++){
1440             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1441         }
1442     }
1443 }
1444
1445 /**
1446  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1447  */
1448 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1449 {
1450     int i;
1451     AVFrame *pic;
1452     s->mb_skipped = 0;
1453
1454     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1455
1456     /* mark&release old frames */
1457     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1458         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1459
1460         /* release forgotten pictures */
1461         /* if(mpeg124/h263) */
1462         if(!s->encoding){
1463             for(i=0; i<MAX_PICTURE_COUNT; i++){
1464                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1465                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1466                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1467                 }
1468             }
1469         }
1470     }
1471 alloc:
1472     if(!s->encoding){
1473         /* release non refernce frames */
1474         for(i=0; i<MAX_PICTURE_COUNT; i++){
1475             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1476                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1477             }
1478         }
1479
1480         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1481             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1482         else{
1483             i= ff_find_unused_picture(s, 0);
1484             pic= (AVFrame*)&s->picture[i];
1485         }
1486
1487         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1488                         && !s->dropable ? 3 : 0;
1489
1490         pic->coded_picture_number= s->coded_picture_number++;
1491
1492         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1493             return -1;
1494
1495         s->current_picture_ptr= (Picture*)pic;
1496         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1497         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1498     }
1499
1500     s->current_picture_ptr->pict_type= s->pict_type;
1501 //    if(s->flags && CODEC_FLAG_QSCALE)
1502   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1503     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1504
1505     copy_picture(&s->current_picture, s->current_picture_ptr);
1506
1507   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1508     if (s->pict_type != B_TYPE) {
1509         s->last_picture_ptr= s->next_picture_ptr;
1510         if(!s->dropable)
1511             s->next_picture_ptr= s->current_picture_ptr;
1512     }
1513 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1514         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1515         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1516         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1517         s->pict_type, s->dropable);*/
1518
1519     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1520     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1521
1522     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1523         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1524         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1525         goto alloc;
1526     }
1527
1528     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1529
1530     if(s->picture_structure!=PICT_FRAME){
1531         int i;
1532         for(i=0; i<4; i++){
1533             if(s->picture_structure == PICT_BOTTOM_FIELD){
1534                  s->current_picture.data[i] += s->current_picture.linesize[i];
1535             }
1536             s->current_picture.linesize[i] *= 2;
1537             s->last_picture.linesize[i] *=2;
1538             s->next_picture.linesize[i] *=2;
1539         }
1540     }
1541   }
1542
1543     s->hurry_up= s->avctx->hurry_up;
1544     s->error_resilience= avctx->error_resilience;
1545
1546     /* set dequantizer, we cant do it during init as it might change for mpeg4
1547        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1548     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1549         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1550         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1551     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1552         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1553         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1554     }else{
1555         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1556         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1557     }
1558
1559     if(s->dct_error_sum){
1560         assert(s->avctx->noise_reduction && s->encoding);
1561
1562         update_noise_reduction(s);
1563     }
1564
1565 #ifdef HAVE_XVMC
1566     if(s->avctx->xvmc_acceleration)
1567         return XVMC_field_start(s, avctx);
1568 #endif
1569     return 0;
1570 }
1571
1572 /* generic function for encode/decode called after a frame has been coded/decoded */
1573 void MPV_frame_end(MpegEncContext *s)
1574 {
1575     int i;
1576     /* draw edge for correct motion prediction if outside */
1577 #ifdef HAVE_XVMC
1578 //just to make sure that all data is rendered.
1579     if(s->avctx->xvmc_acceleration){
1580         XVMC_field_end(s);
1581     }else
1582 #endif
1583     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1584             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1585             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1586             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1587     }
1588     emms_c();
1589
1590     s->last_pict_type    = s->pict_type;
1591     if(s->pict_type!=B_TYPE){
1592         s->last_non_b_pict_type= s->pict_type;
1593     }
1594 #if 0
1595         /* copy back current_picture variables */
1596     for(i=0; i<MAX_PICTURE_COUNT; i++){
1597         if(s->picture[i].data[0] == s->current_picture.data[0]){
1598             s->picture[i]= s->current_picture;
1599             break;
1600         }
1601     }
1602     assert(i<MAX_PICTURE_COUNT);
1603 #endif
1604
1605     if(s->encoding){
1606         /* release non refernce frames */
1607         for(i=0; i<MAX_PICTURE_COUNT; i++){
1608             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1609                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1610             }
1611         }
1612     }
1613     // clear copies, to avoid confusion
1614 #if 0
1615     memset(&s->last_picture, 0, sizeof(Picture));
1616     memset(&s->next_picture, 0, sizeof(Picture));
1617     memset(&s->current_picture, 0, sizeof(Picture));
1618 #endif
1619     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1620 }
1621
1622 /**
1623  * draws an line from (ex, ey) -> (sx, sy).
1624  * @param w width of the image
1625  * @param h height of the image
1626  * @param stride stride/linesize of the image
1627  * @param color color of the arrow
1628  */
1629 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1630     int t, x, y, fr, f;
1631
1632     sx= clip(sx, 0, w-1);
1633     sy= clip(sy, 0, h-1);
1634     ex= clip(ex, 0, w-1);
1635     ey= clip(ey, 0, h-1);
1636
1637     buf[sy*stride + sx]+= color;
1638
1639     if(ABS(ex - sx) > ABS(ey - sy)){
1640         if(sx > ex){
1641             t=sx; sx=ex; ex=t;
1642             t=sy; sy=ey; ey=t;
1643         }
1644         buf+= sx + sy*stride;
1645         ex-= sx;
1646         f= ((ey-sy)<<16)/ex;
1647         for(x= 0; x <= ex; x++){
1648             y = (x*f)>>16;
1649             fr= (x*f)&0xFFFF;
1650             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1651             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1652         }
1653     }else{
1654         if(sy > ey){
1655             t=sx; sx=ex; ex=t;
1656             t=sy; sy=ey; ey=t;
1657         }
1658         buf+= sx + sy*stride;
1659         ey-= sy;
1660         if(ey) f= ((ex-sx)<<16)/ey;
1661         else   f= 0;
1662         for(y= 0; y <= ey; y++){
1663             x = (y*f)>>16;
1664             fr= (y*f)&0xFFFF;
1665             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1666             buf[y*stride + x+1]+= (color*         fr )>>16;;
1667         }
1668     }
1669 }
1670
1671 /**
1672  * draws an arrow from (ex, ey) -> (sx, sy).
1673  * @param w width of the image
1674  * @param h height of the image
1675  * @param stride stride/linesize of the image
1676  * @param color color of the arrow
1677  */
1678 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1679     int dx,dy;
1680
1681     sx= clip(sx, -100, w+100);
1682     sy= clip(sy, -100, h+100);
1683     ex= clip(ex, -100, w+100);
1684     ey= clip(ey, -100, h+100);
1685
1686     dx= ex - sx;
1687     dy= ey - sy;
1688
1689     if(dx*dx + dy*dy > 3*3){
1690         int rx=  dx + dy;
1691         int ry= -dx + dy;
1692         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1693
1694         //FIXME subpixel accuracy
1695         rx= ROUNDED_DIV(rx*3<<4, length);
1696         ry= ROUNDED_DIV(ry*3<<4, length);
1697
1698         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1699         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1700     }
1701     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1702 }
1703
1704 /**
1705  * prints debuging info for the given picture.
1706  */
1707 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1708
1709     if(!pict || !pict->mb_type) return;
1710
1711     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1712         int x,y;
1713
1714         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1715         switch (pict->pict_type) {
1716             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1717             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1718             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1719             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1720             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1721             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1722         }
1723         for(y=0; y<s->mb_height; y++){
1724             for(x=0; x<s->mb_width; x++){
1725                 if(s->avctx->debug&FF_DEBUG_SKIP){
1726                     int count= s->mbskip_table[x + y*s->mb_stride];
1727                     if(count>9) count=9;
1728                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1729                 }
1730                 if(s->avctx->debug&FF_DEBUG_QP){
1731                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1732                 }
1733                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1734                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1735                     //Type & MV direction
1736                     if(IS_PCM(mb_type))
1737                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1738                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1739                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1740                     else if(IS_INTRA4x4(mb_type))
1741                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1742                     else if(IS_INTRA16x16(mb_type))
1743                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1744                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1745                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1746                     else if(IS_DIRECT(mb_type))
1747                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1748                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1749                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1750                     else if(IS_GMC(mb_type))
1751                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1752                     else if(IS_SKIP(mb_type))
1753                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1754                     else if(!USES_LIST(mb_type, 1))
1755                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1756                     else if(!USES_LIST(mb_type, 0))
1757                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1758                     else{
1759                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1760                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1761                     }
1762
1763                     //segmentation
1764                     if(IS_8X8(mb_type))
1765                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1766                     else if(IS_16X8(mb_type))
1767                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1768                     else if(IS_8X16(mb_type))
1769                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1770                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1771                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1772                     else
1773                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1774
1775
1776                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1777                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1778                     else
1779                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1780                 }
1781 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1782             }
1783             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1784         }
1785     }
1786
1787     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1788         const int shift= 1 + s->quarter_sample;
1789         int mb_y;
1790         uint8_t *ptr;
1791         int i;
1792         int h_chroma_shift, v_chroma_shift;
1793         const int width = s->avctx->width;
1794         const int height= s->avctx->height;
1795         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1796         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1797         s->low_delay=0; //needed to see the vectors without trashing the buffers
1798
1799         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1800         for(i=0; i<3; i++){
1801             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1802             pict->data[i]= s->visualization_buffer[i];
1803         }
1804         pict->type= FF_BUFFER_TYPE_COPY;
1805         ptr= pict->data[0];
1806
1807         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1808             int mb_x;
1809             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1810                 const int mb_index= mb_x + mb_y*s->mb_stride;
1811                 if((s->avctx->debug_mv) && pict->motion_val){
1812                   int type;
1813                   for(type=0; type<3; type++){
1814                     int direction = 0;
1815                     switch (type) {
1816                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1817                                 continue;
1818                               direction = 0;
1819                               break;
1820                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1821                                 continue;
1822                               direction = 0;
1823                               break;
1824                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1825                                 continue;
1826                               direction = 1;
1827                               break;
1828                     }
1829                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1830                         continue;
1831
1832                     if(IS_8X8(pict->mb_type[mb_index])){
1833                       int i;
1834                       for(i=0; i<4; i++){
1835                         int sx= mb_x*16 + 4 + 8*(i&1);
1836                         int sy= mb_y*16 + 4 + 8*(i>>1);
1837                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1838                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1839                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1840                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1841                       }
1842                     }else if(IS_16X8(pict->mb_type[mb_index])){
1843                       int i;
1844                       for(i=0; i<2; i++){
1845                         int sx=mb_x*16 + 8;
1846                         int sy=mb_y*16 + 4 + 8*i;
1847                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1848                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1849                         int my=(pict->motion_val[direction][xy][1]>>shift);
1850
1851                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1852                             my*=2;
1853
1854                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1855                       }
1856                     }else if(IS_8X16(pict->mb_type[mb_index])){
1857                       int i;
1858                       for(i=0; i<2; i++){
1859                         int sx=mb_x*16 + 4 + 8*i;
1860                         int sy=mb_y*16 + 8;
1861                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1862                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1863                         int my=(pict->motion_val[direction][xy][1]>>shift);
1864
1865                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1866                             my*=2;
1867
1868                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1869                       }
1870                     }else{
1871                       int sx= mb_x*16 + 8;
1872                       int sy= mb_y*16 + 8;
1873                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1874                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1875                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1876                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1877                     }
1878                   }
1879                 }
1880                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1881                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1882                     int y;
1883                     for(y=0; y<8; y++){
1884                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1885                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1886                     }
1887                 }
1888                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1889                     int mb_type= pict->mb_type[mb_index];
1890                     uint64_t u,v;
1891                     int y;
1892 #define COLOR(theta, r)\
1893 u= (int)(128 + r*cos(theta*3.141592/180));\
1894 v= (int)(128 + r*sin(theta*3.141592/180));
1895
1896
1897                     u=v=128;
1898                     if(IS_PCM(mb_type)){
1899                         COLOR(120,48)
1900                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1901                         COLOR(30,48)
1902                     }else if(IS_INTRA4x4(mb_type)){
1903                         COLOR(90,48)
1904                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1905 //                        COLOR(120,48)
1906                     }else if(IS_DIRECT(mb_type)){
1907                         COLOR(150,48)
1908                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1909                         COLOR(170,48)
1910                     }else if(IS_GMC(mb_type)){
1911                         COLOR(190,48)
1912                     }else if(IS_SKIP(mb_type)){
1913 //                        COLOR(180,48)
1914                     }else if(!USES_LIST(mb_type, 1)){
1915                         COLOR(240,48)
1916                     }else if(!USES_LIST(mb_type, 0)){
1917                         COLOR(0,48)
1918                     }else{
1919                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1920                         COLOR(300,48)
1921                     }
1922
1923                     u*= 0x0101010101010101ULL;
1924                     v*= 0x0101010101010101ULL;
1925                     for(y=0; y<8; y++){
1926                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1927                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1928                     }
1929
1930                     //segmentation
1931                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1932                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1933                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1934                     }
1935                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1936                         for(y=0; y<16; y++)
1937                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1938                     }
1939                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1940                         int dm= 1 << (mv_sample_log2-2);
1941                         for(i=0; i<4; i++){
1942                             int sx= mb_x*16 + 8*(i&1);
1943                             int sy= mb_y*16 + 8*(i>>1);
1944                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1945                             //FIXME bidir
1946                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1947                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1948                                 for(y=0; y<8; y++)
1949                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1950                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1951                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1952                         }
1953                     }
1954
1955                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1956                         // hmm
1957                     }
1958                 }
1959                 s->mbskip_table[mb_index]=0;
1960             }
1961         }
1962     }
1963 }
1964
1965 #ifdef CONFIG_ENCODERS
1966
1967 static int get_sae(uint8_t *src, int ref, int stride){
1968     int x,y;
1969     int acc=0;
1970
1971     for(y=0; y<16; y++){
1972         for(x=0; x<16; x++){
1973             acc+= ABS(src[x+y*stride] - ref);
1974         }
1975     }
1976
1977     return acc;
1978 }
1979
1980 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1981     int x, y, w, h;
1982     int acc=0;
1983
1984     w= s->width &~15;
1985     h= s->height&~15;
1986
1987     for(y=0; y<h; y+=16){
1988         for(x=0; x<w; x+=16){
1989             int offset= x + y*stride;
1990             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1991             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1992             int sae = get_sae(src + offset, mean, stride);
1993
1994             acc+= sae + 500 < sad;
1995         }
1996     }
1997     return acc;
1998 }
1999
2000
2001 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2002     AVFrame *pic=NULL;
2003     int64_t pts;
2004     int i;
2005     const int encoding_delay= s->max_b_frames;
2006     int direct=1;
2007
2008     if(pic_arg){
2009         pts= pic_arg->pts;
2010         pic_arg->display_picture_number= s->input_picture_number++;
2011
2012         if(pts != AV_NOPTS_VALUE){
2013             if(s->user_specified_pts != AV_NOPTS_VALUE){
2014                 int64_t time= pts;
2015                 int64_t last= s->user_specified_pts;
2016
2017                 if(time <= last){
2018                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
2019                     return -1;
2020                 }
2021             }
2022             s->user_specified_pts= pts;
2023         }else{
2024             if(s->user_specified_pts != AV_NOPTS_VALUE){
2025                 s->user_specified_pts=
2026                 pts= s->user_specified_pts + 1;
2027                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
2028             }else{
2029                 pts= pic_arg->display_picture_number;
2030             }
2031         }
2032     }
2033
2034   if(pic_arg){
2035     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2036     if(pic_arg->linesize[0] != s->linesize) direct=0;
2037     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2038     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2039
2040 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2041
2042     if(direct){
2043         i= ff_find_unused_picture(s, 1);
2044
2045         pic= (AVFrame*)&s->picture[i];
2046         pic->reference= 3;
2047
2048         for(i=0; i<4; i++){
2049             pic->data[i]= pic_arg->data[i];
2050             pic->linesize[i]= pic_arg->linesize[i];
2051         }
2052         alloc_picture(s, (Picture*)pic, 1);
2053     }else{
2054         int offset= 16;
2055         i= ff_find_unused_picture(s, 0);
2056
2057         pic= (AVFrame*)&s->picture[i];
2058         pic->reference= 3;
2059
2060         alloc_picture(s, (Picture*)pic, 0);
2061
2062         if(   pic->data[0] + offset == pic_arg->data[0]
2063            && pic->data[1] + offset == pic_arg->data[1]
2064            && pic->data[2] + offset == pic_arg->data[2]){
2065        // empty
2066         }else{
2067             int h_chroma_shift, v_chroma_shift;
2068             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2069
2070             for(i=0; i<3; i++){
2071                 int src_stride= pic_arg->linesize[i];
2072                 int dst_stride= i ? s->uvlinesize : s->linesize;
2073                 int h_shift= i ? h_chroma_shift : 0;
2074                 int v_shift= i ? v_chroma_shift : 0;
2075                 int w= s->width >>h_shift;
2076                 int h= s->height>>v_shift;
2077                 uint8_t *src= pic_arg->data[i];
2078                 uint8_t *dst= pic->data[i] + offset;
2079
2080                 if(src_stride==dst_stride)
2081                     memcpy(dst, src, src_stride*h);
2082                 else{
2083                     while(h--){
2084                         memcpy(dst, src, w);
2085                         dst += dst_stride;
2086                         src += src_stride;
2087                     }
2088                 }
2089             }
2090         }
2091     }
2092     copy_picture_attributes(s, pic, pic_arg);
2093     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2094   }
2095
2096     /* shift buffer entries */
2097     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2098         s->input_picture[i-1]= s->input_picture[i];
2099
2100     s->input_picture[encoding_delay]= (Picture*)pic;
2101
2102     return 0;
2103 }
2104
2105 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2106     int x, y, plane;
2107     int score=0;
2108     int64_t score64=0;
2109
2110     for(plane=0; plane<3; plane++){
2111         const int stride= p->linesize[plane];
2112         const int bw= plane ? 1 : 2;
2113         for(y=0; y<s->mb_height*bw; y++){
2114             for(x=0; x<s->mb_width*bw; x++){
2115                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2116
2117                 switch(s->avctx->frame_skip_exp){
2118                     case 0: score= FFMAX(score, v); break;
2119                     case 1: score+= ABS(v);break;
2120                     case 2: score+= v*v;break;
2121                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2122                     case 4: score64+= v*v*(int64_t)(v*v);break;
2123                 }
2124             }
2125         }
2126     }
2127
2128     if(score) score64= score;
2129
2130     if(score64 < s->avctx->frame_skip_threshold)
2131         return 1;
2132     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2133         return 1;
2134     return 0;
2135 }
2136
2137 static void select_input_picture(MpegEncContext *s){
2138     int i;
2139
2140     for(i=1; i<MAX_PICTURE_COUNT; i++)
2141         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2142     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2143
2144     /* set next picture types & ordering */
2145     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2146         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2147             s->reordered_input_picture[0]= s->input_picture[0];
2148             s->reordered_input_picture[0]->pict_type= I_TYPE;
2149             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2150         }else{
2151             int b_frames;
2152
2153             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2154                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2155 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2156
2157                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2158                         for(i=0; i<4; i++)
2159                             s->input_picture[0]->data[i]= NULL;
2160                         s->input_picture[0]->type= 0;
2161                     }else{
2162                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2163                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2164
2165                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2166                     }
2167
2168                     goto no_output_pic;
2169                 }
2170             }
2171
2172             if(s->flags&CODEC_FLAG_PASS2){
2173                 for(i=0; i<s->max_b_frames+1; i++){
2174                     int pict_num= s->input_picture[0]->display_picture_number + i;
2175
2176                     if(pict_num >= s->rc_context.num_entries)
2177                         break;
2178                     if(!s->input_picture[i]){
2179                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2180                         break;
2181                     }
2182
2183                     s->input_picture[i]->pict_type=
2184                         s->rc_context.entry[pict_num].new_pict_type;
2185                 }
2186             }
2187
2188             if(s->avctx->b_frame_strategy==0){
2189                 b_frames= s->max_b_frames;
2190                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2191             }else if(s->avctx->b_frame_strategy==1){
2192                 for(i=1; i<s->max_b_frames+1; i++){
2193                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2194                         s->input_picture[i]->b_frame_score=
2195                             get_intra_count(s, s->input_picture[i  ]->data[0],
2196                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2197                     }
2198                 }
2199                 for(i=0; i<s->max_b_frames; i++){
2200                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2201                 }
2202
2203                 b_frames= FFMAX(0, i-1);
2204
2205                 /* reset scores */
2206                 for(i=0; i<b_frames+1; i++){
2207                     s->input_picture[i]->b_frame_score=0;
2208                 }
2209             }else{
2210                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2211                 b_frames=0;
2212             }
2213
2214             emms_c();
2215 //static int b_count=0;
2216 //b_count+= b_frames;
2217 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2218
2219             for(i= b_frames - 1; i>=0; i--){
2220                 int type= s->input_picture[i]->pict_type;
2221                 if(type && type != B_TYPE)
2222                     b_frames= i;
2223             }
2224             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2225                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2226             }
2227
2228             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2229               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2230                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2231               }else{
2232                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2233                     b_frames=0;
2234                 s->input_picture[b_frames]->pict_type= I_TYPE;
2235               }
2236             }
2237
2238             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2239                && b_frames
2240                && s->input_picture[b_frames]->pict_type== I_TYPE)
2241                 b_frames--;
2242
2243             s->reordered_input_picture[0]= s->input_picture[b_frames];
2244             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2245                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2246             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2247             for(i=0; i<b_frames; i++){
2248                 s->reordered_input_picture[i+1]= s->input_picture[i];
2249                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2250                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2251             }
2252         }
2253     }
2254 no_output_pic:
2255     if(s->reordered_input_picture[0]){
2256         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2257
2258         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2259
2260         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2261             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2262
2263             int i= ff_find_unused_picture(s, 0);
2264             Picture *pic= &s->picture[i];
2265
2266             /* mark us unused / free shared pic */
2267             for(i=0; i<4; i++)
2268                 s->reordered_input_picture[0]->data[i]= NULL;
2269             s->reordered_input_picture[0]->type= 0;
2270
2271             pic->reference              = s->reordered_input_picture[0]->reference;
2272
2273             alloc_picture(s, pic, 0);
2274
2275             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2276
2277             s->current_picture_ptr= pic;
2278         }else{
2279             // input is not a shared pix -> reuse buffer for current_pix
2280
2281             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2282                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2283
2284             s->current_picture_ptr= s->reordered_input_picture[0];
2285             for(i=0; i<4; i++){
2286                 s->new_picture.data[i]+=16;
2287             }
2288         }
2289         copy_picture(&s->current_picture, s->current_picture_ptr);
2290
2291         s->picture_number= s->new_picture.display_picture_number;
2292 //printf("dpn:%d\n", s->picture_number);
2293     }else{
2294        memset(&s->new_picture, 0, sizeof(Picture));
2295     }
2296 }
2297
2298 int MPV_encode_picture(AVCodecContext *avctx,
2299                        unsigned char *buf, int buf_size, void *data)
2300 {
2301     MpegEncContext *s = avctx->priv_data;
2302     AVFrame *pic_arg = data;
2303     int i, stuffing_count;
2304
2305     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2306         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2307         return -1;
2308     }
2309
2310     for(i=0; i<avctx->thread_count; i++){
2311         int start_y= s->thread_context[i]->start_mb_y;
2312         int   end_y= s->thread_context[i]->  end_mb_y;
2313         int h= s->mb_height;
2314         uint8_t *start= buf + buf_size*start_y/h;
2315         uint8_t *end  = buf + buf_size*  end_y/h;
2316
2317         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2318     }
2319
2320     s->picture_in_gop_number++;
2321
2322     if(load_input_picture(s, pic_arg) < 0)
2323         return -1;
2324
2325     select_input_picture(s);
2326
2327     /* output? */
2328     if(s->new_picture.data[0]){
2329         s->pict_type= s->new_picture.pict_type;
2330 //emms_c();
2331 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2332         MPV_frame_start(s, avctx);
2333
2334         encode_picture(s, s->picture_number);
2335
2336         avctx->real_pict_num  = s->picture_number;
2337         avctx->header_bits = s->header_bits;
2338         avctx->mv_bits     = s->mv_bits;
2339         avctx->misc_bits   = s->misc_bits;
2340         avctx->i_tex_bits  = s->i_tex_bits;
2341         avctx->p_tex_bits  = s->p_tex_bits;
2342         avctx->i_count     = s->i_count;
2343         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2344         avctx->skip_count  = s->skip_count;
2345
2346         MPV_frame_end(s);
2347
2348         if (s->out_format == FMT_MJPEG)
2349             mjpeg_picture_trailer(s);
2350
2351         if(s->flags&CODEC_FLAG_PASS1)
2352             ff_write_pass1_stats(s);
2353
2354         for(i=0; i<4; i++){
2355             avctx->error[i] += s->current_picture_ptr->error[i];
2356         }
2357
2358         if(s->flags&CODEC_FLAG_PASS1)
2359             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2360         flush_put_bits(&s->pb);
2361         s->frame_bits  = put_bits_count(&s->pb);
2362
2363         stuffing_count= ff_vbv_update(s, s->frame_bits);
2364         if(stuffing_count){
2365             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2366                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2367                 return -1;
2368             }
2369
2370             switch(s->codec_id){
2371             case CODEC_ID_MPEG1VIDEO:
2372             case CODEC_ID_MPEG2VIDEO:
2373                 while(stuffing_count--){
2374                     put_bits(&s->pb, 8, 0);
2375                 }
2376             break;
2377             case CODEC_ID_MPEG4:
2378                 put_bits(&s->pb, 16, 0);
2379                 put_bits(&s->pb, 16, 0x1C3);
2380                 stuffing_count -= 4;
2381                 while(stuffing_count--){
2382                     put_bits(&s->pb, 8, 0xFF);
2383                 }
2384             break;
2385             default:
2386                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2387             }
2388             flush_put_bits(&s->pb);
2389             s->frame_bits  = put_bits_count(&s->pb);
2390         }
2391
2392         /* update mpeg1/2 vbv_delay for CBR */
2393         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2394            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2395             int vbv_delay;
2396
2397             assert(s->repeat_first_field==0);
2398
2399             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2400             assert(vbv_delay < 0xFFFF);
2401
2402             s->vbv_delay_ptr[0] &= 0xF8;
2403             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2404             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2405             s->vbv_delay_ptr[2] &= 0x07;
2406             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2407         }
2408         s->total_bits += s->frame_bits;
2409         avctx->frame_bits  = s->frame_bits;
2410     }else{
2411         assert((pbBufPtr(&s->pb) == s->pb.buf));
2412         s->frame_bits=0;
2413     }
2414     assert((s->frame_bits&7)==0);
2415
2416     return s->frame_bits/8;
2417 }
2418
2419 #endif //CONFIG_ENCODERS
2420
2421 static inline void gmc1_motion(MpegEncContext *s,
2422                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2423                                uint8_t **ref_picture)
2424 {
2425     uint8_t *ptr;
2426     int offset, src_x, src_y, linesize, uvlinesize;
2427     int motion_x, motion_y;
2428     int emu=0;
2429
2430     motion_x= s->sprite_offset[0][0];
2431     motion_y= s->sprite_offset[0][1];
2432     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2433     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2434     motion_x<<=(3-s->sprite_warping_accuracy);
2435     motion_y<<=(3-s->sprite_warping_accuracy);
2436     src_x = clip(src_x, -16, s->width);
2437     if (src_x == s->width)
2438         motion_x =0;
2439     src_y = clip(src_y, -16, s->height);
2440     if (src_y == s->height)
2441         motion_y =0;
2442
2443     linesize = s->linesize;
2444     uvlinesize = s->uvlinesize;
2445
2446     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2447
2448     if(s->flags&CODEC_FLAG_EMU_EDGE){
2449         if(   (unsigned)src_x >= s->h_edge_pos - 17
2450            || (unsigned)src_y >= s->v_edge_pos - 17){
2451             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2452             ptr= s->edge_emu_buffer;
2453         }
2454     }
2455
2456     if((motion_x|motion_y)&7){
2457         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2458         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2459     }else{
2460         int dxy;
2461
2462         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2463         if (s->no_rounding){
2464             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2465         }else{
2466             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2467         }
2468     }
2469
2470     if(s->flags&CODEC_FLAG_GRAY) return;
2471
2472     motion_x= s->sprite_offset[1][0];
2473     motion_y= s->sprite_offset[1][1];
2474     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2475     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2476     motion_x<<=(3-s->sprite_warping_accuracy);
2477     motion_y<<=(3-s->sprite_warping_accuracy);
2478     src_x = clip(src_x, -8, s->width>>1);
2479     if (src_x == s->width>>1)
2480         motion_x =0;
2481     src_y = clip(src_y, -8, s->height>>1);
2482     if (src_y == s->height>>1)
2483         motion_y =0;
2484
2485     offset = (src_y * uvlinesize) + src_x;
2486     ptr = ref_picture[1] + offset;
2487     if(s->flags&CODEC_FLAG_EMU_EDGE){
2488         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2489            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2490             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2491             ptr= s->edge_emu_buffer;
2492             emu=1;
2493         }
2494     }
2495     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2496
2497     ptr = ref_picture[2] + offset;
2498     if(emu){
2499         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2500         ptr= s->edge_emu_buffer;
2501     }
2502     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2503
2504     return;
2505 }
2506
2507 static inline void gmc_motion(MpegEncContext *s,
2508                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2509                                uint8_t **ref_picture)
2510 {
2511     uint8_t *ptr;
2512     int linesize, uvlinesize;
2513     const int a= s->sprite_warping_accuracy;
2514     int ox, oy;
2515
2516     linesize = s->linesize;
2517     uvlinesize = s->uvlinesize;
2518
2519     ptr = ref_picture[0];
2520
2521     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2522     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2523
2524     s->dsp.gmc(dest_y, ptr, linesize, 16,
2525            ox,
2526            oy,
2527            s->sprite_delta[0][0], s->sprite_delta[0][1],
2528            s->sprite_delta[1][0], s->sprite_delta[1][1],
2529            a+1, (1<<(2*a+1)) - s->no_rounding,
2530            s->h_edge_pos, s->v_edge_pos);
2531     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2532            ox + s->sprite_delta[0][0]*8,
2533            oy + s->sprite_delta[1][0]*8,
2534            s->sprite_delta[0][0], s->sprite_delta[0][1],
2535            s->sprite_delta[1][0], s->sprite_delta[1][1],
2536            a+1, (1<<(2*a+1)) - s->no_rounding,
2537            s->h_edge_pos, s->v_edge_pos);
2538
2539     if(s->flags&CODEC_FLAG_GRAY) return;
2540
2541     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2542     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2543
2544     ptr = ref_picture[1];
2545     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2546            ox,
2547            oy,
2548            s->sprite_delta[0][0], s->sprite_delta[0][1],
2549            s->sprite_delta[1][0], s->sprite_delta[1][1],
2550            a+1, (1<<(2*a+1)) - s->no_rounding,
2551            s->h_edge_pos>>1, s->v_edge_pos>>1);
2552
2553     ptr = ref_picture[2];
2554     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2555            ox,
2556            oy,
2557            s->sprite_delta[0][0], s->sprite_delta[0][1],
2558            s->sprite_delta[1][0], s->sprite_delta[1][1],
2559            a+1, (1<<(2*a+1)) - s->no_rounding,
2560            s->h_edge_pos>>1, s->v_edge_pos>>1);
2561 }
2562
2563 /**
2564  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2565  * @param buf destination buffer
2566  * @param src source buffer
2567  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2568  * @param block_w width of block
2569  * @param block_h height of block
2570  * @param src_x x coordinate of the top left sample of the block in the source buffer
2571  * @param src_y y coordinate of the top left sample of the block in the source buffer
2572  * @param w width of the source buffer
2573  * @param h height of the source buffer
2574  */
2575 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2576                                     int src_x, int src_y, int w, int h){
2577     int x, y;
2578     int start_y, start_x, end_y, end_x;
2579
2580     if(src_y>= h){
2581         src+= (h-1-src_y)*linesize;
2582         src_y=h-1;
2583     }else if(src_y<=-block_h){
2584         src+= (1-block_h-src_y)*linesize;
2585         src_y=1-block_h;
2586     }
2587     if(src_x>= w){
2588         src+= (w-1-src_x);
2589         src_x=w-1;
2590     }else if(src_x<=-block_w){
2591         src+= (1-block_w-src_x);
2592         src_x=1-block_w;
2593     }
2594
2595     start_y= FFMAX(0, -src_y);
2596     start_x= FFMAX(0, -src_x);
2597     end_y= FFMIN(block_h, h-src_y);
2598     end_x= FFMIN(block_w, w-src_x);
2599
2600     // copy existing part
2601     for(y=start_y; y<end_y; y++){
2602         for(x=start_x; x<end_x; x++){
2603             buf[x + y*linesize]= src[x + y*linesize];
2604         }
2605     }
2606
2607     //top
2608     for(y=0; y<start_y; y++){
2609         for(x=start_x; x<end_x; x++){
2610             buf[x + y*linesize]= buf[x + start_y*linesize];
2611         }
2612     }
2613
2614     //bottom
2615     for(y=end_y; y<block_h; y++){
2616         for(x=start_x; x<end_x; x++){
2617             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2618         }
2619     }
2620
2621     for(y=0; y<block_h; y++){
2622        //left
2623         for(x=0; x<start_x; x++){
2624             buf[x + y*linesize]= buf[start_x + y*linesize];
2625         }
2626
2627        //right
2628         for(x=end_x; x<block_w; x++){
2629             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2630         }
2631     }
2632 }
2633
2634 static inline int hpel_motion(MpegEncContext *s,
2635                                   uint8_t *dest, uint8_t *src,
2636                                   int field_based, int field_select,
2637                                   int src_x, int src_y,
2638                                   int width, int height, int stride,
2639                                   int h_edge_pos, int v_edge_pos,
2640                                   int w, int h, op_pixels_func *pix_op,
2641                                   int motion_x, int motion_y)
2642 {
2643     int dxy;
2644     int emu=0;
2645
2646     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2647     src_x += motion_x >> 1;
2648     src_y += motion_y >> 1;
2649
2650     /* WARNING: do no forget half pels */
2651     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2652     if (src_x == width)
2653         dxy &= ~1;
2654     src_y = clip(src_y, -16, height);
2655     if (src_y == height)
2656         dxy &= ~2;
2657     src += src_y * stride + src_x;
2658
2659     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2660         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2661            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2662             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2663                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2664             src= s->edge_emu_buffer;
2665             emu=1;
2666         }
2667     }
2668     if(field_select)
2669         src += s->linesize;
2670     pix_op[dxy](dest, src, stride, h);
2671     return emu;
2672 }
2673
2674 static inline int hpel_motion_lowres(MpegEncContext *s,
2675                                   uint8_t *dest, uint8_t *src,
2676                                   int field_based, int field_select,
2677                                   int src_x, int src_y,
2678                                   int width, int height, int stride,
2679                                   int h_edge_pos, int v_edge_pos,
2680                                   int w, int h, h264_chroma_mc_func *pix_op,
2681                                   int motion_x, int motion_y)
2682 {
2683     const int lowres= s->avctx->lowres;
2684     const int s_mask= (2<<lowres)-1;
2685     int emu=0;
2686     int sx, sy;
2687
2688     if(s->quarter_sample){
2689         motion_x/=2;
2690         motion_y/=2;
2691     }
2692
2693     sx= motion_x & s_mask;
2694     sy= motion_y & s_mask;
2695     src_x += motion_x >> (lowres+1);
2696     src_y += motion_y >> (lowres+1);
2697
2698     src += src_y * stride + src_x;
2699
2700     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2701        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2702         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2703                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2704         src= s->edge_emu_buffer;
2705         emu=1;
2706     }
2707
2708     sx <<= 2 - lowres;
2709     sy <<= 2 - lowres;
2710     if(field_select)
2711         src += s->linesize;
2712     pix_op[lowres](dest, src, stride, h, sx, sy);
2713     return emu;
2714 }
2715
2716 /* apply one mpeg motion vector to the three components */
2717 static always_inline void mpeg_motion(MpegEncContext *s,
2718                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2719                                int field_based, int bottom_field, int field_select,
2720                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2721                                int motion_x, int motion_y, int h)
2722 {
2723     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2724     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2725
2726 #if 0
2727 if(s->quarter_sample)
2728 {
2729     motion_x>>=1;
2730     motion_y>>=1;
2731 }
2732 #endif
2733
2734     v_edge_pos = s->v_edge_pos >> field_based;
2735     linesize   = s->current_picture.linesize[0] << field_based;
2736     uvlinesize = s->current_picture.linesize[1] << field_based;
2737
2738     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2739     src_x = s->mb_x* 16               + (motion_x >> 1);
2740     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2741
2742     if (s->out_format == FMT_H263) {
2743         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2744             mx = (motion_x>>1)|(motion_x&1);
2745             my = motion_y >>1;
2746             uvdxy = ((my & 1) << 1) | (mx & 1);
2747             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2748             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2749         }else{
2750             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2751             uvsrc_x = src_x>>1;
2752             uvsrc_y = src_y>>1;
2753         }
2754     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2755         mx = motion_x / 4;
2756         my = motion_y / 4;
2757         uvdxy = 0;
2758         uvsrc_x = s->mb_x*8 + mx;
2759         uvsrc_y = s->mb_y*8 + my;
2760     } else {
2761         if(s->chroma_y_shift){
2762             mx = motion_x / 2;
2763             my = motion_y / 2;
2764             uvdxy = ((my & 1) << 1) | (mx & 1);
2765             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2766             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2767         } else {
2768             if(s->chroma_x_shift){
2769             //Chroma422
2770                 mx = motion_x / 2;
2771                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2772                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2773                 uvsrc_y = src_y;
2774             } else {
2775             //Chroma444
2776                 uvdxy = dxy;
2777                 uvsrc_x = src_x;
2778                 uvsrc_y = src_y;
2779             }
2780         }
2781     }
2782
2783     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2784     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2785     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2786
2787     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2788        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2789             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2790                s->codec_id == CODEC_ID_MPEG1VIDEO){
2791                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2792                 return ;
2793             }
2794             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2795                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2796             ptr_y = s->edge_emu_buffer;
2797             if(!(s->flags&CODEC_FLAG_GRAY)){
2798                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2799                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2800                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2801                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2802                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2803                 ptr_cb= uvbuf;
2804                 ptr_cr= uvbuf+16;
2805             }
2806     }
2807
2808     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2809         dest_y += s->linesize;
2810         dest_cb+= s->uvlinesize;
2811         dest_cr+= s->uvlinesize;
2812     }
2813
2814     if(field_select){
2815         ptr_y += s->linesize;
2816         ptr_cb+= s->uvlinesize;
2817         ptr_cr+= s->uvlinesize;
2818     }
2819
2820     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2821
2822     if(!(s->flags&CODEC_FLAG_GRAY)){
2823         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2824         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2825     }
2826 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2827     if(s->out_format == FMT_H261){
2828         ff_h261_loop_filter(s);
2829     }
2830 #endif
2831 }
2832
2833 /* apply one mpeg motion vector to the three components */
2834 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2835                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2836                                int field_based, int bottom_field, int field_select,
2837                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2838                                int motion_x, int motion_y, int h)
2839 {
2840     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2841     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2842     const int lowres= s->avctx->lowres;
2843     const int block_s= 8>>lowres;
2844     const int s_mask= (2<<lowres)-1;
2845     const int h_edge_pos = s->h_edge_pos >> lowres;
2846     const int v_edge_pos = s->v_edge_pos >> lowres;
2847     linesize   = s->current_picture.linesize[0] << field_based;
2848     uvlinesize = s->current_picture.linesize[1] << field_based;
2849
2850     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2851         motion_x/=2;
2852         motion_y/=2;
2853     }
2854
2855     if(field_based){
2856         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2857     }
2858
2859     sx= motion_x & s_mask;
2860     sy= motion_y & s_mask;
2861     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2862     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2863
2864     if (s->out_format == FMT_H263) {
2865         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2866         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2867         uvsrc_x = src_x>>1;
2868         uvsrc_y = src_y>>1;
2869     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2870         mx = motion_x / 4;
2871         my = motion_y / 4;
2872         uvsx = (2*mx) & s_mask;
2873         uvsy = (2*my) & s_mask;
2874         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2875         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2876     } else {
2877         mx = motion_x / 2;
2878         my = motion_y / 2;
2879         uvsx = mx & s_mask;
2880         uvsy = my & s_mask;
2881         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2882         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2883     }
2884
2885     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2886     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2887     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2888
2889     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2890        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2891             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2892                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2893             ptr_y = s->edge_emu_buffer;
2894             if(!(s->flags&CODEC_FLAG_GRAY)){
2895                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2896                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2897                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2898                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2899                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2900                 ptr_cb= uvbuf;
2901                 ptr_cr= uvbuf+16;
2902             }
2903     }
2904
2905     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2906         dest_y += s->linesize;
2907         dest_cb+= s->uvlinesize;
2908         dest_cr+= s->uvlinesize;
2909     }
2910
2911     if(field_select){
2912         ptr_y += s->linesize;
2913         ptr_cb+= s->uvlinesize;
2914         ptr_cr+= s->uvlinesize;
2915     }
2916
2917     sx <<= 2 - lowres;
2918     sy <<= 2 - lowres;
2919     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2920
2921     if(!(s->flags&CODEC_FLAG_GRAY)){
2922         uvsx <<= 2 - lowres;
2923         uvsy <<= 2 - lowres;
2924         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2925         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2926     }
2927     //FIXME h261 lowres loop filter
2928 }
2929
2930 //FIXME move to dsputil, avg variant, 16x16 version
2931 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2932     int x;
2933     uint8_t * const top   = src[1];
2934     uint8_t * const left  = src[2];
2935     uint8_t * const mid   = src[0];
2936     uint8_t * const right = src[3];
2937     uint8_t * const bottom= src[4];
2938 #define OBMC_FILTER(x, t, l, m, r, b)\
2939     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2940 #define OBMC_FILTER4(x, t, l, m, r, b)\
2941     OBMC_FILTER(x         , t, l, m, r, b);\
2942     OBMC_FILTER(x+1       , t, l, m, r, b);\
2943     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2944     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2945
2946     x=0;
2947     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2948     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2949     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2950     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2951     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2952     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2953     x+= stride;
2954     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2955     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2956     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2957     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2958     x+= stride;
2959     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2960     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2961     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2962     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2963     x+= 2*stride;
2964     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2965     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2966     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2967     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2968     x+= 2*stride;
2969     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2970     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2971     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2972     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2973     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2974     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2975     x+= stride;
2976     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2977     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2978     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2979     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2980 }
2981
2982 /* obmc for 1 8x8 luma block */
2983 static inline void obmc_motion(MpegEncContext *s,
2984                                uint8_t *dest, uint8_t *src,
2985                                int src_x, int src_y,
2986                                op_pixels_func *pix_op,
2987                                int16_t mv[5][2]/* mid top left right bottom*/)
2988 #define MID    0
2989 {
2990     int i;
2991     uint8_t *ptr[5];
2992
2993     assert(s->quarter_sample==0);
2994
2995     for(i=0; i<5; i++){
2996         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2997             ptr[i]= ptr[MID];
2998         }else{
2999             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3000             hpel_motion(s, ptr[i], src, 0, 0,
3001                         src_x, src_y,
3002                         s->width, s->height, s->linesize,
3003                         s->h_edge_pos, s->v_edge_pos,
3004                         8, 8, pix_op,
3005                         mv[i][0], mv[i][1]);
3006         }
3007     }
3008
3009     put_obmc(dest, ptr, s->linesize);
3010 }
3011
3012 static inline void qpel_motion(MpegEncContext *s,
3013                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3014                                int field_based, int bottom_field, int field_select,
3015                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3016                                qpel_mc_func (*qpix_op)[16],
3017                                int motion_x, int motion_y, int h)
3018 {
3019     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3020     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3021
3022     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3023     src_x = s->mb_x *  16                 + (motion_x >> 2);
3024     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3025
3026     v_edge_pos = s->v_edge_pos >> field_based;
3027     linesize = s->linesize << field_based;
3028     uvlinesize = s->uvlinesize << field_based;
3029
3030     if(field_based){
3031         mx= motion_x/2;
3032         my= motion_y>>1;
3033     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3034         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3035         mx= (motion_x>>1) + rtab[motion_x&7];
3036         my= (motion_y>>1) + rtab[motion_y&7];
3037     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3038         mx= (motion_x>>1)|(motion_x&1);
3039         my= (motion_y>>1)|(motion_y&1);
3040     }else{
3041         mx= motion_x/2;
3042         my= motion_y/2;
3043     }
3044     mx= (mx>>1)|(mx&1);
3045     my= (my>>1)|(my&1);
3046
3047     uvdxy= (mx&1) | ((my&1)<<1);
3048     mx>>=1;
3049     my>>=1;
3050
3051     uvsrc_x = s->mb_x *  8                 + mx;
3052     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3053
3054     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3055     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3056     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3057
3058     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3059        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3060         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3061                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3062         ptr_y= s->edge_emu_buffer;
3063         if(!(s->flags&CODEC_FLAG_GRAY)){
3064             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3065             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3066                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3067             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3068                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3069             ptr_cb= uvbuf;
3070             ptr_cr= uvbuf + 16;
3071         }
3072     }
3073
3074     if(!field_based)
3075         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3076     else{
3077         if(bottom_field){
3078             dest_y += s->linesize;
3079             dest_cb+= s->uvlinesize;
3080             dest_cr+= s->uvlinesize;
3081         }
3082
3083         if(field_select){
3084             ptr_y  += s->linesize;
3085             ptr_cb += s->uvlinesize;
3086             ptr_cr += s->uvlinesize;
3087         }
3088         //damn interlaced mode
3089         //FIXME boundary mirroring is not exactly correct here
3090         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3091         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3092     }
3093     if(!(s->flags&CODEC_FLAG_GRAY)){
3094         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3095         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3096     }
3097 }
3098
3099 inline int ff_h263_round_chroma(int x){
3100     if (x >= 0)
3101         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3102     else {
3103         x = -x;
3104         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3105     }
3106 }
3107
3108 /**
3109  * h263 chorma 4mv motion compensation.
3110  */
3111 static inline void chroma_4mv_motion(MpegEncContext *s,
3112                                      uint8_t *dest_cb, uint8_t *dest_cr,
3113                                      uint8_t **ref_picture,
3114                                      op_pixels_func *pix_op,
3115                                      int mx, int my){
3116     int dxy, emu=0, src_x, src_y, offset;
3117     uint8_t *ptr;
3118
3119     /* In case of 8X8, we construct a single chroma motion vector
3120        with a special rounding */
3121     mx= ff_h263_round_chroma(mx);
3122     my= ff_h263_round_chroma(my);
3123
3124     dxy = ((my & 1) << 1) | (mx & 1);
3125     mx >>= 1;
3126     my >>= 1;
3127
3128     src_x = s->mb_x * 8 + mx;
3129     src_y = s->mb_y * 8 + my;
3130     src_x = clip(src_x, -8, s->width/2);
3131     if (src_x == s->width/2)
3132         dxy &= ~1;
3133     src_y = clip(src_y, -8, s->height/2);
3134     if (src_y == s->height/2)
3135         dxy &= ~2;
3136
3137     offset = (src_y * (s->uvlinesize)) + src_x;
3138     ptr = ref_picture[1] + offset;
3139     if(s->flags&CODEC_FLAG_EMU_EDGE){
3140         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3141            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3142             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3143             ptr= s->edge_emu_buffer;
3144             emu=1;
3145         }
3146     }
3147     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3148
3149     ptr = ref_picture[2] + offset;
3150     if(emu){
3151         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3152         ptr= s->edge_emu_buffer;
3153     }
3154     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3155 }
3156
3157 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3158                                      uint8_t *dest_cb, uint8_t *dest_cr,
3159                                      uint8_t **ref_picture,
3160                                      h264_chroma_mc_func *pix_op,
3161                                      int mx, int my){
3162     const int lowres= s->avctx->lowres;
3163     const int block_s= 8>>lowres;
3164     const int s_mask= (2<<lowres)-1;
3165     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3166     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3167     int emu=0, src_x, src_y, offset, sx, sy;
3168     uint8_t *ptr;
3169
3170     if(s->quarter_sample){
3171         mx/=2;
3172         my/=2;
3173     }
3174
3175     /* In case of 8X8, we construct a single chroma motion vector
3176        with a special rounding */
3177     mx= ff_h263_round_chroma(mx);
3178     my= ff_h263_round_chroma(my);
3179
3180     sx= mx & s_mask;
3181     sy= my & s_mask;
3182     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3183     src_y = s->mb_y*block_s + (my >> (lowres+1));
3184
3185     offset = src_y * s->uvlinesize + src_x;
3186     ptr = ref_picture[1] + offset;
3187     if(s->flags&CODEC_FLAG_EMU_EDGE){
3188         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3189            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3190             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3191             ptr= s->edge_emu_buffer;
3192             emu=1;
3193         }
3194     }
3195     sx <<= 2 - lowres;
3196     sy <<= 2 - lowres;
3197     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3198
3199     ptr = ref_picture[2] + offset;
3200     if(emu){
3201         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3202         ptr= s->edge_emu_buffer;
3203     }
3204     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3205 }
3206
3207 /**
3208  * motion compesation of a single macroblock
3209  * @param s context
3210  * @param dest_y luma destination pointer
3211  * @param dest_cb chroma cb/u destination pointer
3212  * @param dest_cr chroma cr/v destination pointer
3213  * @param dir direction (0->forward, 1->backward)
3214  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3215  * @param pic_op halfpel motion compensation function (average or put normally)
3216  * @param pic_op qpel motion compensation function (average or put normally)
3217  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3218  */
3219 static inline void MPV_motion(MpegEncContext *s,
3220                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3221                               int dir, uint8_t **ref_picture,
3222                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3223 {
3224     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3225     int mb_x, mb_y, i;
3226     uint8_t *ptr, *dest;
3227
3228     mb_x = s->mb_x;
3229     mb_y = s->mb_y;
3230
3231     if(s->obmc && s->pict_type != B_TYPE){
3232         int16_t mv_cache[4][4][2];
3233         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3234         const int mot_stride= s->b8_stride;
3235         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3236
3237         assert(!s->mb_skipped);
3238
3239         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3240         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3241         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3242
3243         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3244             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3245         }else{
3246             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3247         }
3248
3249         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3250             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3251             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3252         }else{
3253             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3254             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3255         }
3256
3257         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3258             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3259             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3260         }else{
3261             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3262             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3263         }
3264
3265         mx = 0;
3266         my = 0;
3267         for(i=0;i<4;i++) {
3268             const int x= (i&1)+1;
3269             const int y= (i>>1)+1;
3270             int16_t mv[5][2]= {
3271                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3272                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3273                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3274                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3275                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3276             //FIXME cleanup
3277             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3278                         ref_picture[0],
3279                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3280                         pix_op[1],
3281                         mv);
3282
3283             mx += mv[0][0];
3284             my += mv[0][1];
3285         }
3286         if(!(s->flags&CODEC_FLAG_GRAY))
3287             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3288
3289         return;
3290     }
3291
3292     switch(s->mv_type) {
3293     case MV_TYPE_16X16:
3294         if(s->mcsel){
3295             if(s->real_sprite_warping_points==1){
3296                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3297                             ref_picture);
3298             }else{
3299                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3300                             ref_picture);
3301             }
3302         }else if(s->quarter_sample){
3303             qpel_motion(s, dest_y, dest_cb, dest_cr,
3304                         0, 0, 0,
3305                         ref_picture, pix_op, qpix_op,
3306                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3307         }else if(s->mspel){
3308             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3309                         ref_picture, pix_op,
3310                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3311         }else
3312         {
3313             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3314                         0, 0, 0,
3315                         ref_picture, pix_op,
3316                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3317         }
3318         break;
3319     case MV_TYPE_8X8:
3320         mx = 0;
3321         my = 0;
3322         if(s->quarter_sample){
3323             for(i=0;i<4;i++) {
3324                 motion_x = s->mv[dir][i][0];
3325                 motion_y = s->mv[dir][i][1];
3326
3327                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3328                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3329                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3330
3331                 /* WARNING: do no forget half pels */
3332                 src_x = clip(src_x, -16, s->width);
3333                 if (src_x == s->width)
3334                     dxy &= ~3;
3335                 src_y = clip(src_y, -16, s->height);
3336                 if (src_y == s->height)
3337                     dxy &= ~12;
3338
3339                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3340                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3341                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3342                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3343                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3344                         ptr= s->edge_emu_buffer;
3345                     }
3346                 }
3347                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3348                 qpix_op[1][dxy](dest, ptr, s->linesize);
3349
3350                 mx += s->mv[dir][i][0]/2;
3351                 my += s->mv[dir][i][1]/2;
3352             }
3353         }else{
3354             for(i=0;i<4;i++) {
3355                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3356                             ref_picture[0], 0, 0,
3357                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3358                             s->width, s->height, s->linesize,
3359                             s->h_edge_pos, s->v_edge_pos,
3360                             8, 8, pix_op[1],
3361                             s->mv[dir][i][0], s->mv[dir][i][1]);
3362
3363                 mx += s->mv[dir][i][0];
3364                 my += s->mv[dir][i][1];
3365             }
3366         }
3367
3368         if(!(s->flags&CODEC_FLAG_GRAY))
3369             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3370         break;
3371     case MV_TYPE_FIELD:
3372         if (s->picture_structure == PICT_FRAME) {
3373             if(s->quarter_sample){
3374                 for(i=0; i<2; i++){
3375                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3376                                 1, i, s->field_select[dir][i],
3377                                 ref_picture, pix_op, qpix_op,
3378                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3379                 }
3380             }else{
3381                 /* top field */
3382                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3383                             1, 0, s->field_select[dir][0],
3384                             ref_picture, pix_op,
3385                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3386                 /* bottom field */
3387                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3388                             1, 1, s->field_select[dir][1],
3389                             ref_picture, pix_op,
3390                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3391             }
3392         } else {
3393             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3394                 ref_picture= s->current_picture_ptr->data;
3395             }
3396
3397             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3398                         0, 0, s->field_select[dir][0],
3399                         ref_picture, pix_op,
3400                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3401         }
3402         break;
3403     case MV_TYPE_16X8:
3404         for(i=0; i<2; i++){
3405             uint8_t ** ref2picture;
3406
3407             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3408                 ref2picture= ref_picture;
3409             }else{
3410                 ref2picture= s->current_picture_ptr->data;
3411             }
3412
3413             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3414                         0, 0, s->field_select[dir][i],
3415                         ref2picture, pix_op,
3416                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3417
3418             dest_y += 16*s->linesize;
3419             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3420             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3421         }
3422         break;
3423     case MV_TYPE_DMV:
3424         if(s->picture_structure == PICT_FRAME){
3425             for(i=0; i<2; i++){
3426                 int j;
3427                 for(j=0; j<2; j++){
3428                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3429                                 1, j, j^i,
3430                                 ref_picture, pix_op,
3431                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3432                 }
3433                 pix_op = s->dsp.avg_pixels_tab;
3434             }
3435         }else{
3436             for(i=0; i<2; i++){
3437                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3438                             0, 0, s->picture_structure != i+1,
3439                             ref_picture, pix_op,
3440                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3441
3442                 // after put we make avg of the same block
3443                 pix_op=s->dsp.avg_pixels_tab;
3444
3445                 //opposite parity is always in the same frame if this is second field
3446                 if(!s->first_field){
3447                     ref_picture = s->current_picture_ptr->data;
3448                 }
3449             }
3450         }
3451     break;
3452     default: assert(0);
3453     }
3454 }
3455
3456 /**
3457  * motion compesation of a single macroblock
3458  * @param s context
3459  * @param dest_y luma destination pointer
3460  * @param dest_cb chroma cb/u destination pointer
3461  * @param dest_cr chroma cr/v destination pointer
3462  * @param dir direction (0->forward, 1->backward)
3463  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3464  * @param pic_op halfpel motion compensation function (average or put normally)
3465  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3466  */
3467 static inline void MPV_motion_lowres(MpegEncContext *s,
3468                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3469                               int dir, uint8_t **ref_picture,
3470                               h264_chroma_mc_func *pix_op)
3471 {
3472     int mx, my;
3473     int mb_x, mb_y, i;
3474     const int lowres= s->avctx->lowres;
3475     const int block_s= 8>>lowres;
3476
3477     mb_x = s->mb_x;
3478     mb_y = s->mb_y;
3479
3480     switch(s->mv_type) {
3481     case MV_TYPE_16X16:
3482         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3483                     0, 0, 0,
3484                     ref_picture, pix_op,
3485                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3486         break;
3487     case MV_TYPE_8X8:
3488         mx = 0;
3489         my = 0;
3490             for(i=0;i<4;i++) {
3491                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3492                             ref_picture[0], 0, 0,
3493                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3494                             s->width, s->height, s->linesize,
3495                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3496                             block_s, block_s, pix_op,
3497                             s->mv[dir][i][0], s->mv[dir][i][1]);
3498
3499                 mx += s->mv[dir][i][0];
3500                 my += s->mv[dir][i][1];
3501             }
3502
3503         if(!(s->flags&CODEC_FLAG_GRAY))
3504             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3505         break;
3506     case MV_TYPE_FIELD:
3507         if (s->picture_structure == PICT_FRAME) {
3508             /* top field */
3509             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3510                         1, 0, s->field_select[dir][0],
3511                         ref_picture, pix_op,
3512                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3513             /* bottom field */
3514             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3515                         1, 1, s->field_select[dir][1],
3516                         ref_picture, pix_op,
3517                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3518         } else {
3519             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3520                 ref_picture= s->current_picture_ptr->data;
3521             }
3522
3523             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3524                         0, 0, s->field_select[dir][0],
3525                         ref_picture, pix_op,
3526                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3527         }
3528         break;
3529     case MV_TYPE_16X8:
3530         for(i=0; i<2; i++){
3531             uint8_t ** ref2picture;
3532
3533             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3534                 ref2picture= ref_picture;
3535             }else{
3536                 ref2picture= s->current_picture_ptr->data;
3537             }
3538
3539             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3540                         0, 0, s->field_select[dir][i],
3541                         ref2picture, pix_op,
3542                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3543
3544             dest_y += 2*block_s*s->linesize;
3545             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3546             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3547         }
3548         break;
3549     case MV_TYPE_DMV:
3550         if(s->picture_structure == PICT_FRAME){
3551             for(i=0; i<2; i++){
3552                 int j;
3553                 for(j=0; j<2; j++){
3554                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3555                                 1, j, j^i,
3556                                 ref_picture, pix_op,
3557                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3558                 }
3559                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3560             }
3561         }else{
3562             for(i=0; i<2; i++){
3563                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3564                             0, 0, s->picture_structure != i+1,
3565                             ref_picture, pix_op,
3566                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3567
3568                 // after put we make avg of the same block
3569                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3570
3571                 //opposite parity is always in the same frame if this is second field
3572                 if(!s->first_field){
3573                     ref_picture = s->current_picture_ptr->data;
3574                 }
3575             }
3576         }
3577     break;
3578     default: assert(0);
3579     }
3580 }
3581
3582 /* put block[] to dest[] */
3583 static inline void put_dct(MpegEncContext *s,
3584                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3585 {
3586     s->dct_unquantize_intra(s, block, i, qscale);
3587     s->dsp.idct_put (dest, line_size, block);
3588 }
3589
3590 /* add block[] to dest[] */
3591 static inline void add_dct(MpegEncContext *s,
3592                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3593 {
3594     if (s->block_last_index[i] >= 0) {
3595         s->dsp.idct_add (dest, line_size, block);
3596     }
3597 }
3598
3599 static inline void add_dequant_dct(MpegEncContext *s,
3600                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3601 {
3602     if (s->block_last_index[i] >= 0) {
3603         s->dct_unquantize_inter(s, block, i, qscale);
3604
3605         s->dsp.idct_add (dest, line_size, block);
3606     }
3607 }
3608
3609 /**
3610  * cleans dc, ac, coded_block for the current non intra MB
3611  */
3612 void ff_clean_intra_table_entries(MpegEncContext *s)
3613 {
3614     int wrap = s->b8_stride;
3615     int xy = s->block_index[0];
3616
3617     s->dc_val[0][xy           ] =
3618     s->dc_val[0][xy + 1       ] =
3619     s->dc_val[0][xy     + wrap] =
3620     s->dc_val[0][xy + 1 + wrap] = 1024;
3621     /* ac pred */
3622     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3623     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3624     if (s->msmpeg4_version>=3) {
3625         s->coded_block[xy           ] =
3626         s->coded_block[xy + 1       ] =
3627         s->coded_block[xy     + wrap] =
3628         s->coded_block[xy + 1 + wrap] = 0;
3629     }
3630     /* chroma */
3631     wrap = s->mb_stride;
3632     xy = s->mb_x + s->mb_y * wrap;
3633     s->dc_val[1][xy] =
3634     s->dc_val[2][xy] = 1024;
3635     /* ac pred */
3636     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3637     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3638
3639     s->mbintra_table[xy]= 0;
3640 }
3641
3642 /* generic function called after a macroblock has been parsed by the
3643    decoder or after it has been encoded by the encoder.
3644
3645    Important variables used:
3646    s->mb_intra : true if intra macroblock
3647    s->mv_dir   : motion vector direction
3648    s->mv_type  : motion vector type
3649    s->mv       : motion vector
3650    s->interlaced_dct : true if interlaced dct used (mpeg2)
3651  */
3652 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3653 {
3654     int mb_x, mb_y;
3655     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3656 #ifdef HAVE_XVMC
3657     if(s->avctx->xvmc_acceleration){
3658         XVMC_decode_mb(s);//xvmc uses pblocks
3659         return;
3660     }
3661 #endif
3662
3663     mb_x = s->mb_x;
3664     mb_y = s->mb_y;
3665
3666     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3667        /* save DCT coefficients */
3668        int i,j;
3669        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3670        for(i=0; i<6; i++)
3671            for(j=0; j<64; j++)
3672                *dct++ = block[i][s->dsp.idct_permutation[j]];
3673     }
3674
3675     s->current_picture.qscale_table[mb_xy]= s->qscale;
3676
3677     /* update DC predictors for P macroblocks */
3678     if (!s->mb_intra) {
3679         if (s->h263_pred || s->h263_aic) {
3680             if(s->mbintra_table[mb_xy])
3681                 ff_clean_intra_table_entries(s);
3682         } else {
3683             s->last_dc[0] =
3684             s->last_dc[1] =
3685             s->last_dc[2] = 128 << s->intra_dc_precision;
3686         }
3687     }
3688     else if (s->h263_pred || s->h263_aic)
3689         s->mbintra_table[mb_xy]=1;
3690
3691     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3692         uint8_t *dest_y, *dest_cb, *dest_cr;
3693         int dct_linesize, dct_offset;
3694         op_pixels_func (*op_pix)[4];
3695         qpel_mc_func (*op_qpix)[16];
3696         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3697         const int uvlinesize= s->current_picture.linesize[1];
3698         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3699         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3700
3701         /* avoid copy if macroblock skipped in last frame too */
3702         /* skip only during decoding as we might trash the buffers during encoding a bit */
3703         if(!s->encoding){
3704             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3705             const int age= s->current_picture.age;
3706
3707             assert(age);
3708
3709             if (s->mb_skipped) {
3710                 s->mb_skipped= 0;
3711                 assert(s->pict_type!=I_TYPE);
3712
3713                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3714                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3715
3716                 /* if previous was skipped too, then nothing to do !  */
3717                 if (*mbskip_ptr >= age && s->current_picture.reference){
3718                     return;
3719                 }
3720             } else if(!s->current_picture.reference){
3721                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3722                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3723             } else{
3724                 *mbskip_ptr = 0; /* not skipped */
3725             }
3726         }
3727
3728         dct_linesize = linesize << s->interlaced_dct;
3729         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3730
3731         if(readable){
3732             dest_y=  s->dest[0];
3733             dest_cb= s->dest[1];
3734             dest_cr= s->dest[2];
3735         }else{
3736             dest_y = s->b_scratchpad;
3737             dest_cb= s->b_scratchpad+16*linesize;
3738             dest_cr= s->b_scratchpad+32*linesize;
3739         }
3740
3741         if (!s->mb_intra) {
3742             /* motion handling */
3743             /* decoding or more than one mb_type (MC was already done otherwise) */
3744             if(!s->encoding){
3745                 if(lowres_flag){
3746                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3747
3748                     if (s->mv_dir & MV_DIR_FORWARD) {
3749                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3750                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3751                     }
3752                     if (s->mv_dir & MV_DIR_BACKWARD) {
3753                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3754                     }
3755                 }else{
3756                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3757                         op_pix = s->dsp.put_pixels_tab;
3758                         op_qpix= s->dsp.put_qpel_pixels_tab;
3759                     }else{
3760                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3761                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3762                     }
3763                     if (s->mv_dir & MV_DIR_FORWARD) {
3764                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3765                         op_pix = s->dsp.avg_pixels_tab;
3766                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3767                     }
3768                     if (s->mv_dir & MV_DIR_BACKWARD) {
3769                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3770                     }
3771                 }
3772             }
3773
3774             /* skip dequant / idct if we are really late ;) */
3775             if(s->hurry_up>1) return;
3776
3777             /* add dct residue */
3778             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3779                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3780                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3781                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3782                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3783                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3784
3785                 if(!(s->flags&CODEC_FLAG_GRAY)){
3786                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3787                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3788                 }
3789             } else if(s->codec_id != CODEC_ID_WMV2){
3790                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3791                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3792                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3793                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3794
3795                 if(!(s->flags&CODEC_FLAG_GRAY)){
3796                     if(s->chroma_y_shift){//Chroma420
3797                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3798                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3799                     }else{
3800                         //chroma422
3801                         dct_linesize = uvlinesize << s->interlaced_dct;
3802                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3803
3804                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3805                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3806                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3807                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3808                         if(!s->chroma_x_shift){//Chroma444
3809                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3810                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3811                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3812                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3813                         }
3814                     }
3815                 }//fi gray
3816             }
3817             else{
3818                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3819             }
3820         } else {
3821             /* dct only in intra block */
3822             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3823                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3824                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3825                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3826                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3827
3828                 if(!(s->flags&CODEC_FLAG_GRAY)){
3829                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3830                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3831                 }
3832             }else{
3833                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3834                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3835                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3836                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3837
3838                 if(!(s->flags&CODEC_FLAG_GRAY)){
3839                     if(s->chroma_y_shift){
3840                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3841                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3842                     }else{
3843
3844                         dct_linesize = uvlinesize << s->interlaced_dct;
3845                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3846
3847                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3848                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3849                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3850                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3851                         if(!s->chroma_x_shift){//Chroma444
3852                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3853                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3854                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3855                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3856                         }
3857                     }
3858                 }//gray
3859             }
3860         }
3861         if(!readable){
3862             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3863             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3864             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3865         }
3866     }
3867 }
3868
3869 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3870     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3871     else                  MPV_decode_mb_internal(s, block, 0);
3872 }
3873
3874 #ifdef CONFIG_ENCODERS
3875
3876 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3877 {
3878     static const char tab[64]=
3879         {3,2,2,1,1,1,1,1,
3880          1,1,1,1,1,1,1,1,
3881          1,1,1,1,1,1,1,1,
3882          0,0,0,0,0,0,0,0,
3883          0,0,0,0,0,0,0,0,
3884          0,0,0,0,0,0,0,0,
3885          0,0,0,0,0,0,0,0,
3886          0,0,0,0,0,0,0,0};
3887     int score=0;
3888     int run=0;
3889     int i;
3890     DCTELEM *block= s->block[n];
3891     const int last_index= s->block_last_index[n];
3892     int skip_dc;
3893
3894     if(threshold<0){
3895         skip_dc=0;
3896         threshold= -threshold;
3897     }else
3898         skip_dc=1;
3899
3900     /* are all which we could set to zero are allready zero? */
3901     if(last_index<=skip_dc - 1) return;
3902
3903     for(i=0; i<=last_index; i++){
3904         const int j = s->intra_scantable.permutated[i];
3905         const int level = ABS(block[j]);
3906         if(level==1){
3907             if(skip_dc && i==0) continue;
3908             score+= tab[run];
3909             run=0;
3910         }else if(level>1){
3911             return;
3912         }else{
3913             run++;
3914         }
3915     }
3916     if(score >= threshold) return;
3917     for(i=skip_dc; i<=last_index; i++){
3918         const int j = s->intra_scantable.permutated[i];
3919         block[j]=0;
3920     }
3921     if(block[0]) s->block_last_index[n]= 0;
3922     else         s->block_last_index[n]= -1;
3923 }
3924
3925 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3926 {
3927     int i;
3928     const int maxlevel= s->max_qcoeff;
3929     const int minlevel= s->min_qcoeff;
3930     int overflow=0;
3931
3932     if(s->mb_intra){
3933         i=1; //skip clipping of intra dc
3934     }else
3935         i=0;
3936
3937     for(;i<=last_index; i++){
3938         const int j= s->intra_scantable.permutated[i];
3939         int level = block[j];
3940
3941         if     (level>maxlevel){
3942             level=maxlevel;
3943             overflow++;
3944         }else if(level<minlevel){
3945             level=minlevel;
3946             overflow++;
3947         }
3948
3949         block[j]= level;
3950     }
3951
3952     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3953         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
3954 }
3955
3956 #endif //CONFIG_ENCODERS
3957
3958 /**
3959  *
3960  * @param h is the normal height, this will be reduced automatically if needed for the last row
3961  */
3962 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3963     if (s->avctx->draw_horiz_band) {
3964         AVFrame *src;
3965         int offset[4];
3966
3967         if(s->picture_structure != PICT_FRAME){
3968             h <<= 1;
3969             y <<= 1;
3970             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3971         }
3972
3973         h= FFMIN(h, s->avctx->height - y);
3974
3975         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3976             src= (AVFrame*)s->current_picture_ptr;
3977         else if(s->last_picture_ptr)
3978             src= (AVFrame*)s->last_picture_ptr;
3979         else
3980             return;
3981
3982         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3983             offset[0]=
3984             offset[1]=
3985             offset[2]=
3986             offset[3]= 0;
3987         }else{
3988             offset[0]= y * s->linesize;;
3989             offset[1]=
3990             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3991             offset[3]= 0;
3992         }
3993
3994         emms_c();
3995
3996         s->avctx->draw_horiz_band(s->avctx, src, offset,
3997                                   y, s->picture_structure, h);
3998     }
3999 }
4000
4001 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4002     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
4003     const int uvlinesize= s->current_picture.linesize[1];
4004     const int mb_size= 4 - s->avctx->lowres;
4005
4006     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4007     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4008     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4009     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4010     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4011     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4012     //block_index is not used by mpeg2, so it is not affected by chroma_format
4013
4014     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4015     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4016     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4017
4018     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4019     {
4020         s->dest[0] += s->mb_y *   linesize << mb_size;
4021         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4022         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4023     }
4024 }
4025
4026 #ifdef CONFIG_ENCODERS
4027
4028 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4029     int x, y;
4030 //FIXME optimize
4031     for(y=0; y<8; y++){
4032         for(x=0; x<8; x++){
4033             int x2, y2;
4034             int sum=0;
4035             int sqr=0;
4036             int count=0;
4037
4038             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4039                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4040                     int v= ptr[x2 + y2*stride];
4041                     sum += v;
4042                     sqr += v*v;
4043                     count++;
4044                 }
4045             }
4046             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4047         }
4048     }
4049 }
4050
4051 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4052 {
4053     int16_t weight[6][64];
4054     DCTELEM orig[6][64];
4055     const int mb_x= s->mb_x;
4056     const int mb_y= s->mb_y;
4057     int i;
4058     int skip_dct[6];
4059     int dct_offset   = s->linesize*8; //default for progressive frames
4060     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4061     int wrap_y, wrap_c;
4062
4063     for(i=0; i<6; i++) skip_dct[i]=0;
4064
4065     if(s->adaptive_quant){
4066         const int last_qp= s->qscale;
4067         const int mb_xy= mb_x + mb_y*s->mb_stride;
4068
4069         s->lambda= s->lambda_table[mb_xy];
4070         update_qscale(s);
4071
4072         if(!(s->flags&CODEC_FLAG_QP_RD)){
4073             s->dquant= s->qscale - last_qp;
4074
4075             if(s->out_format==FMT_H263){
4076                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4077
4078                 if(s->codec_id==CODEC_ID_MPEG4){
4079                     if(!s->mb_intra){
4080                         if(s->pict_type == B_TYPE){
4081                             if(s->dquant&1)
4082                                 s->dquant= (s->dquant/2)*2;
4083                             if(s->mv_dir&MV_DIRECT)
4084                                 s->dquant= 0;
4085                         }
4086                         if(s->mv_type==MV_TYPE_8X8)
4087                             s->dquant=0;
4088                     }
4089                 }
4090             }
4091         }
4092         ff_set_qscale(s, last_qp + s->dquant);
4093     }else if(s->flags&CODEC_FLAG_QP_RD)
4094         ff_set_qscale(s, s->qscale + s->dquant);
4095
4096     wrap_y = s->linesize;
4097     wrap_c = s->uvlinesize;
4098     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4099     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4100     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4101
4102     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4103         uint8_t *ebuf= s->edge_emu_buffer + 32;
4104         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4105         ptr_y= ebuf;
4106         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4107         ptr_cb= ebuf+18*wrap_y;
4108         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4109         ptr_cr= ebuf+18*wrap_y+8;
4110     }
4111
4112     if (s->mb_intra) {
4113         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4114             int progressive_score, interlaced_score;
4115
4116             s->interlaced_dct=0;
4117             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4118                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4119
4120             if(progressive_score > 0){
4121                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4122                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4123                 if(progressive_score > interlaced_score){
4124                     s->interlaced_dct=1;
4125
4126                     dct_offset= wrap_y;
4127                     wrap_y<<=1;
4128                 }
4129             }
4130         }
4131
4132         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4133         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4134         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4135         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4136
4137         if(s->flags&CODEC_FLAG_GRAY){
4138             skip_dct[4]= 1;
4139             skip_dct[5]= 1;
4140         }else{
4141             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4142             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4143         }
4144     }else{
4145         op_pixels_func (*op_pix)[4];
4146         qpel_mc_func (*op_qpix)[16];
4147         uint8_t *dest_y, *dest_cb, *dest_cr;
4148
4149         dest_y  = s->dest[0];
4150         dest_cb = s->dest[1];
4151         dest_cr = s->dest[2];
4152
4153         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4154             op_pix = s->dsp.put_pixels_tab;
4155             op_qpix= s->dsp.put_qpel_pixels_tab;
4156         }else{
4157             op_pix = s->dsp.put_no_rnd_pixels_tab;
4158             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4159         }
4160
4161         if (s->mv_dir & MV_DIR_FORWARD) {
4162             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4163             op_pix = s->dsp.avg_pixels_tab;
4164             op_qpix= s->dsp.avg_qpel_pixels_tab;
4165         }
4166         if (s->mv_dir & MV_DIR_BACKWARD) {
4167             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4168         }
4169
4170         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4171             int progressive_score, interlaced_score;
4172
4173             s->interlaced_dct=0;
4174             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4175                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4176
4177             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4178
4179             if(progressive_score>0){
4180                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4181                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4182
4183                 if(progressive_score > interlaced_score){
4184                     s->interlaced_dct=1;
4185
4186                     dct_offset= wrap_y;
4187                     wrap_y<<=1;
4188                 }
4189             }
4190         }
4191
4192         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4193         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4194         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4195         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4196
4197         if(s->flags&CODEC_FLAG_GRAY){
4198             skip_dct[4]= 1;
4199             skip_dct[5]= 1;
4200         }else{
4201             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4202             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4203         }
4204         /* pre quantization */
4205         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4206             //FIXME optimize
4207             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4208             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4209             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4210             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4211             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4212             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4213         }
4214     }
4215
4216     if(s->avctx->quantizer_noise_shaping){
4217         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4218         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4219         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4220         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4221         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4222         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4223         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4224     }
4225
4226     /* DCT & quantize */
4227     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4228     {
4229         for(i=0;i<6;i++) {
4230             if(!skip_dct[i]){
4231                 int overflow;
4232                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4233             // FIXME we could decide to change to quantizer instead of clipping
4234             // JS: I don't think that would be a good idea it could lower quality instead
4235             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4236                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4237             }else
4238                 s->block_last_index[i]= -1;
4239         }
4240         if(s->avctx->quantizer_noise_shaping){
4241             for(i=0;i<6;i++) {
4242                 if(!skip_dct[i]){
4243                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4244                 }
4245             }
4246         }
4247
4248         if(s->luma_elim_threshold && !s->mb_intra)
4249             for(i=0; i<4; i++)
4250                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4251         if(s->chroma_elim_threshold && !s->mb_intra)
4252             for(i=4; i<6; i++)
4253                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4254
4255         if(s->flags & CODEC_FLAG_CBP_RD){
4256             for(i=0;i<6;i++) {
4257                 if(s->block_last_index[i] == -1)
4258                     s->coded_score[i]= INT_MAX/256;
4259             }
4260         }
4261     }
4262
4263     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4264         s->block_last_index[4]=
4265         s->block_last_index[5]= 0;
4266         s->block[4][0]=
4267         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4268     }
4269
4270     //non c quantize code returns incorrect block_last_index FIXME
4271     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4272         for(i=0; i<6; i++){
4273             int j;
4274             if(s->block_last_index[i]>0){
4275                 for(j=63; j>0; j--){
4276                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4277                 }
4278                 s->block_last_index[i]= j;
4279             }
4280         }
4281     }
4282
4283     /* huffman encode */
4284     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4285     case CODEC_ID_MPEG1VIDEO:
4286     case CODEC_ID_MPEG2VIDEO:
4287         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4288     case CODEC_ID_MPEG4:
4289         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4290     case CODEC_ID_MSMPEG4V2:
4291     case CODEC_ID_MSMPEG4V3:
4292     case CODEC_ID_WMV1:
4293         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4294     case CODEC_ID_WMV2:
4295          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4296 #ifdef CONFIG_H261_ENCODER
4297     case CODEC_ID_H261:
4298         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4299 #endif
4300     case CODEC_ID_H263:
4301     case CODEC_ID_H263P:
4302     case CODEC_ID_FLV1:
4303     case CODEC_ID_RV10:
4304     case CODEC_ID_RV20:
4305         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4306     case CODEC_ID_MJPEG:
4307         mjpeg_encode_mb(s, s->block); break;
4308     default:
4309         assert(0);
4310     }
4311 }
4312
4313 #endif //CONFIG_ENCODERS
4314
4315 void ff_mpeg_flush(AVCodecContext *avctx){
4316     int i;
4317     MpegEncContext *s = avctx->priv_data;
4318
4319     if(s==NULL || s->picture==NULL)
4320         return;
4321
4322     for(i=0; i<MAX_PICTURE_COUNT; i++){
4323        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4324                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4325         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4326     }
4327     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4328
4329     s->mb_x= s->mb_y= 0;
4330
4331     s->parse_context.state= -1;
4332     s->parse_context.frame_start_found= 0;
4333     s->parse_context.overread= 0;
4334     s->parse_context.overread_index= 0;
4335     s->parse_context.index= 0;
4336     s->parse_context.last_index= 0;
4337     s->bitstream_buffer_size=0;
4338 }
4339
4340 #ifdef CONFIG_ENCODERS
4341 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4342 {
4343     const uint16_t *srcw= (uint16_t*)src;
4344     int words= length>>4;
4345     int bits= length&15;
4346     int i;
4347
4348     if(length==0) return;
4349
4350     if(words < 16){
4351         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4352     }else if(put_bits_count(pb)&7){
4353         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4354     }else{
4355         for(i=0; put_bits_count(pb)&31; i++)
4356             put_bits(pb, 8, src[i]);
4357         flush_put_bits(pb);
4358         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4359         skip_put_bytes(pb, 2*words-i);
4360     }
4361
4362     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4363 }
4364
4365 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4366     int i;
4367
4368     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4369
4370     /* mpeg1 */
4371     d->mb_skip_run= s->mb_skip_run;
4372     for(i=0; i<3; i++)
4373         d->last_dc[i]= s->last_dc[i];
4374
4375     /* statistics */
4376     d->mv_bits= s->mv_bits;
4377     d->i_tex_bits= s->i_tex_bits;
4378     d->p_tex_bits= s->p_tex_bits;
4379     d->i_count= s->i_count;
4380     d->f_count= s->f_count;
4381     d->b_count= s->b_count;
4382     d->skip_count= s->skip_count;
4383     d->misc_bits= s->misc_bits;
4384     d->last_bits= 0;
4385
4386     d->mb_skipped= 0;
4387     d->qscale= s->qscale;
4388     d->dquant= s->dquant;
4389 }
4390
4391 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4392     int i;
4393
4394     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4395     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4396
4397     /* mpeg1 */
4398     d->mb_skip_run= s->mb_skip_run;
4399     for(i=0; i<3; i++)
4400         d->last_dc[i]= s->last_dc[i];
4401
4402     /* statistics */
4403     d->mv_bits= s->mv_bits;
4404     d->i_tex_bits= s->i_tex_bits;
4405     d->p_tex_bits= s->p_tex_bits;
4406     d->i_count= s->i_count;
4407     d->f_count= s->f_count;
4408     d->b_count= s->b_count;
4409     d->skip_count= s->skip_count;
4410     d->misc_bits= s->misc_bits;
4411
4412     d->mb_intra= s->mb_intra;
4413     d->mb_skipped= s->mb_skipped;
4414     d->mv_type= s->mv_type;
4415     d->mv_dir= s->mv_dir;
4416     d->pb= s->pb;
4417     if(s->data_partitioning){
4418         d->pb2= s->pb2;
4419         d->tex_pb= s->tex_pb;
4420     }
4421     d->block= s->block;
4422     for(i=0; i<6; i++)
4423         d->block_last_index[i]= s->block_last_index[i];
4424     d->interlaced_dct= s->interlaced_dct;
4425     d->qscale= s->qscale;
4426 }
4427
4428 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4429                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4430                            int *dmin, int *next_block, int motion_x, int motion_y)
4431 {
4432     int score;
4433     uint8_t *dest_backup[3];
4434
4435     copy_context_before_encode(s, backup, type);
4436
4437     s->block= s->blocks[*next_block];
4438     s->pb= pb[*next_block];
4439     if(s->data_partitioning){
4440         s->pb2   = pb2   [*next_block];
4441         s->tex_pb= tex_pb[*next_block];
4442     }
4443
4444     if(*next_block){
4445         memcpy(dest_backup, s->dest, sizeof(s->dest));
4446         s->dest[0] = s->rd_scratchpad;
4447         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4448         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4449         assert(s->linesize >= 32); //FIXME
4450     }
4451
4452     encode_mb(s, motion_x, motion_y);
4453
4454     score= put_bits_count(&s->pb);
4455     if(s->data_partitioning){
4456         score+= put_bits_count(&s->pb2);
4457         score+= put_bits_count(&s->tex_pb);
4458     }
4459
4460     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4461         MPV_decode_mb(s, s->block);
4462
4463         score *= s->lambda2;
4464         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4465     }
4466
4467     if(*next_block){
4468         memcpy(s->dest, dest_backup, sizeof(s->dest));
4469     }
4470
4471     if(score<*dmin){
4472         *dmin= score;
4473         *next_block^=1;
4474
4475         copy_context_after_encode(best, s, type);
4476     }
4477 }
4478
4479 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4480     uint32_t *sq = squareTbl + 256;
4481     int acc=0;
4482     int x,y;
4483
4484     if(w==16 && h==16)
4485         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4486     else if(w==8 && h==8)
4487         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4488
4489     for(y=0; y<h; y++){
4490         for(x=0; x<w; x++){
4491             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4492         }
4493     }
4494
4495     assert(acc>=0);
4496
4497     return acc;
4498 }
4499
4500 static int sse_mb(MpegEncContext *s){
4501     int w= 16;
4502     int h= 16;
4503
4504     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4505     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4506
4507     if(w==16 && h==16)
4508       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4509         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4510                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4511                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4512       }else{
4513         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4514                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4515                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4516       }
4517     else
4518         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4519                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4520                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4521 }
4522
4523 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4524     MpegEncContext *s= arg;
4525
4526
4527     s->me.pre_pass=1;
4528     s->me.dia_size= s->avctx->pre_dia_size;
4529     s->first_slice_line=1;
4530     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4531         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4532             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4533         }
4534         s->first_slice_line=0;
4535     }
4536
4537     s->me.pre_pass=0;
4538
4539     return 0;
4540 }
4541
4542 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4543     MpegEncContext *s= arg;
4544
4545     s->me.dia_size= s->avctx->dia_size;
4546     s->first_slice_line=1;
4547     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4548         s->mb_x=0; //for block init below
4549         ff_init_block_index(s);
4550         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4551             s->block_index[0]+=2;
4552             s->block_index[1]+=2;
4553             s->block_index[2]+=2;
4554             s->block_index[3]+=2;
4555
4556             /* compute motion vector & mb_type and store in context */
4557             if(s->pict_type==B_TYPE)
4558                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4559             else
4560                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4561         }
4562         s->first_slice_line=0;
4563     }
4564     return 0;
4565 }
4566
4567 static int mb_var_thread(AVCodecContext *c, void *arg){
4568     MpegEncContext *s= arg;
4569     int mb_x, mb_y;
4570
4571     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4572         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4573             int xx = mb_x * 16;
4574             int yy = mb_y * 16;
4575             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4576             int varc;
4577             int sum = s->dsp.pix_sum(pix, s->linesize);
4578
4579             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4580
4581             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4582             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4583             s->me.mb_var_sum_temp    += varc;
4584         }
4585     }
4586     return 0;
4587 }
4588
4589 static void write_slice_end(MpegEncContext *s){
4590     if(s->codec_id==CODEC_ID_MPEG4){
4591         if(s->partitioned_frame){
4592             ff_mpeg4_merge_partitions(s);
4593         }
4594
4595         ff_mpeg4_stuffing(&s->pb);
4596     }else if(s->out_format == FMT_MJPEG){
4597         ff_mjpeg_stuffing(&s->pb);
4598     }
4599
4600     align_put_bits(&s->pb);
4601     flush_put_bits(&s->pb);
4602
4603     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4604         s->misc_bits+= get_bits_diff(s);
4605 }
4606
4607 static int encode_thread(AVCodecContext *c, void *arg){
4608     MpegEncContext *s= arg;
4609     int mb_x, mb_y, pdif = 0;
4610     int i, j;
4611     MpegEncContext best_s, backup_s;
4612     uint8_t bit_buf[2][MAX_MB_BYTES];
4613     uint8_t bit_buf2[2][MAX_MB_BYTES];
4614     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4615     PutBitContext pb[2], pb2[2], tex_pb[2];
4616 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4617
4618     for(i=0; i<2; i++){
4619         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4620         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4621         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4622     }
4623
4624     s->last_bits= put_bits_count(&s->pb);
4625     s->mv_bits=0;
4626     s->misc_bits=0;
4627     s->i_tex_bits=0;
4628     s->p_tex_bits=0;
4629     s->i_count=0;
4630     s->f_count=0;
4631     s->b_count=0;
4632     s->skip_count=0;
4633
4634     for(i=0; i<3; i++){
4635         /* init last dc values */
4636         /* note: quant matrix value (8) is implied here */
4637         s->last_dc[i] = 128 << s->intra_dc_precision;
4638
4639         s->current_picture_ptr->error[i] = 0;
4640     }
4641     s->mb_skip_run = 0;
4642     memset(s->last_mv, 0, sizeof(s->last_mv));
4643
4644     s->last_mv_dir = 0;
4645
4646     switch(s->codec_id){
4647     case CODEC_ID_H263:
4648     case CODEC_ID_H263P:
4649     case CODEC_ID_FLV1:
4650         s->gob_index = ff_h263_get_gob_height(s);
4651         break;
4652     case CODEC_ID_MPEG4:
4653         if(s->partitioned_frame)
4654             ff_mpeg4_init_partitions(s);
4655         break;
4656     }
4657
4658     s->resync_mb_x=0;
4659     s->resync_mb_y=0;
4660     s->first_slice_line = 1;
4661     s->ptr_lastgob = s->pb.buf;
4662     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4663 //    printf("row %d at %X\n", s->mb_y, (int)s);
4664         s->mb_x=0;
4665         s->mb_y= mb_y;
4666
4667         ff_set_qscale(s, s->qscale);
4668         ff_init_block_index(s);
4669
4670         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4671             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4672             int mb_type= s->mb_type[xy];
4673 //            int d;
4674             int dmin= INT_MAX;
4675             int dir;
4676
4677             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4678                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4679                 return -1;
4680             }
4681             if(s->data_partitioning){
4682                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4683                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4684                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4685                     return -1;
4686                 }
4687             }
4688
4689             s->mb_x = mb_x;
4690             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4691             ff_update_block_index(s);
4692
4693 #ifdef CONFIG_H261_ENCODER
4694             if(s->codec_id == CODEC_ID_H261){
4695                 ff_h261_reorder_mb_index(s);
4696                 xy= s->mb_y*s->mb_stride + s->mb_x;
4697                 mb_type= s->mb_type[xy];
4698             }
4699 #endif
4700
4701             /* write gob / video packet header  */
4702             if(s->rtp_mode){
4703                 int current_packet_size, is_gob_start;
4704
4705                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4706
4707                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4708
4709                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4710
4711                 switch(s->codec_id){
4712                 case CODEC_ID_H263:
4713                 case CODEC_ID_H263P:
4714                     if(!s->h263_slice_structured)
4715                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4716                     break;
4717                 case CODEC_ID_MPEG2VIDEO:
4718                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4719                 case CODEC_ID_MPEG1VIDEO:
4720                     if(s->mb_skip_run) is_gob_start=0;
4721                     break;
4722                 }
4723
4724                 if(is_gob_start){
4725                     if(s->start_mb_y != mb_y || mb_x!=0){
4726                         write_slice_end(s);
4727
4728                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4729                             ff_mpeg4_init_partitions(s);
4730                         }
4731                     }
4732
4733                     assert((put_bits_count(&s->pb)&7) == 0);
4734                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4735
4736                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4737                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4738                         int d= 100 / s->avctx->error_rate;
4739                         if(r % d == 0){
4740                             current_packet_size=0;
4741 #ifndef ALT_BITSTREAM_WRITER
4742                             s->pb.buf_ptr= s->ptr_lastgob;
4743 #endif
4744                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4745                         }
4746                     }
4747
4748                     if (s->avctx->rtp_callback){
4749                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4750                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4751                     }
4752
4753                     switch(s->codec_id){
4754                     case CODEC_ID_MPEG4:
4755                         ff_mpeg4_encode_video_packet_header(s);
4756                         ff_mpeg4_clean_buffers(s);
4757                     break;
4758                     case CODEC_ID_MPEG1VIDEO:
4759                     case CODEC_ID_MPEG2VIDEO:
4760                         ff_mpeg1_encode_slice_header(s);
4761                         ff_mpeg1_clean_buffers(s);
4762                     break;
4763                     case CODEC_ID_H263:
4764                     case CODEC_ID_H263P:
4765                         h263_encode_gob_header(s, mb_y);
4766                     break;
4767                     }
4768
4769                     if(s->flags&CODEC_FLAG_PASS1){
4770                         int bits= put_bits_count(&s->pb);
4771                         s->misc_bits+= bits - s->last_bits;
4772                         s->last_bits= bits;
4773                     }
4774
4775                     s->ptr_lastgob += current_packet_size;
4776                     s->first_slice_line=1;
4777                     s->resync_mb_x=mb_x;
4778                     s->resync_mb_y=mb_y;
4779                 }
4780             }
4781
4782             if(  (s->resync_mb_x   == s->mb_x)
4783                && s->resync_mb_y+1 == s->mb_y){
4784                 s->first_slice_line=0;
4785             }
4786
4787             s->mb_skipped=0;
4788             s->dquant=0; //only for QP_RD
4789
4790             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4791                 int next_block=0;
4792                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4793
4794                 copy_context_before_encode(&backup_s, s, -1);
4795                 backup_s.pb= s->pb;
4796                 best_s.data_partitioning= s->data_partitioning;
4797                 best_s.partitioned_frame= s->partitioned_frame;
4798                 if(s->data_partitioning){
4799                     backup_s.pb2= s->pb2;
4800                     backup_s.tex_pb= s->tex_pb;
4801                 }
4802
4803                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4804                     s->mv_dir = MV_DIR_FORWARD;
4805                     s->mv_type = MV_TYPE_16X16;
4806                     s->mb_intra= 0;
4807                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4808                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4809                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4810                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4811                 }
4812                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4813                     s->mv_dir = MV_DIR_FORWARD;
4814                     s->mv_type = MV_TYPE_FIELD;
4815                     s->mb_intra= 0;
4816                     for(i=0; i<2; i++){
4817                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4818                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4819                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4820                     }
4821                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4822                                  &dmin, &next_block, 0, 0);
4823                 }
4824                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4825                     s->mv_dir = MV_DIR_FORWARD;
4826                     s->mv_type = MV_TYPE_16X16;
4827                     s->mb_intra= 0;
4828                     s->mv[0][0][0] = 0;
4829                     s->mv[0][0][1] = 0;
4830                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4831                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4832                 }
4833                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4834                     s->mv_dir = MV_DIR_FORWARD;
4835                     s->mv_type = MV_TYPE_8X8;
4836                     s->mb_intra= 0;
4837                     for(i=0; i<4; i++){
4838                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4839                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4840                     }
4841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4842                                  &dmin, &next_block, 0, 0);
4843                 }
4844                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4845                     s->mv_dir = MV_DIR_FORWARD;
4846                     s->mv_type = MV_TYPE_16X16;
4847                     s->mb_intra= 0;
4848                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4849                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4850                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4851                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4852                 }
4853                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4854                     s->mv_dir = MV_DIR_BACKWARD;
4855                     s->mv_type = MV_TYPE_16X16;
4856                     s->mb_intra= 0;
4857                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4858                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4859                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4860                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4861                 }
4862                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4863                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4864                     s->mv_type = MV_TYPE_16X16;
4865                     s->mb_intra= 0;
4866                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4867                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4868                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4869                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4870                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4871                                  &dmin, &next_block, 0, 0);
4872                 }
4873                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4874                     int mx= s->b_direct_mv_table[xy][0];
4875                     int my= s->b_direct_mv_table[xy][1];
4876
4877                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4878                     s->mb_intra= 0;
4879                     ff_mpeg4_set_direct_mv(s, mx, my);
4880                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4881                                  &dmin, &next_block, mx, my);
4882                 }
4883                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4884                     s->mv_dir = MV_DIR_FORWARD;
4885                     s->mv_type = MV_TYPE_FIELD;
4886                     s->mb_intra= 0;
4887                     for(i=0; i<2; i++){
4888                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4889                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4890                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4891                     }
4892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4893                                  &dmin, &next_block, 0, 0);
4894                 }
4895                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4896                     s->mv_dir = MV_DIR_BACKWARD;
4897                     s->mv_type = MV_TYPE_FIELD;
4898                     s->mb_intra= 0;
4899                     for(i=0; i<2; i++){
4900                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4901                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4902                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4903                     }
4904                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4905                                  &dmin, &next_block, 0, 0);
4906                 }
4907                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4908                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4909                     s->mv_type = MV_TYPE_FIELD;
4910                     s->mb_intra= 0;
4911                     for(dir=0; dir<2; dir++){
4912                         for(i=0; i<2; i++){
4913                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4914                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4915                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4916                         }
4917                     }
4918                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4919                                  &dmin, &next_block, 0, 0);
4920                 }
4921                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4922                     s->mv_dir = 0;
4923                     s->mv_type = MV_TYPE_16X16;
4924                     s->mb_intra= 1;
4925                     s->mv[0][0][0] = 0;
4926                     s->mv[0][0][1] = 0;
4927                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4928                                  &dmin, &next_block, 0, 0);
4929                     if(s->h263_pred || s->h263_aic){
4930                         if(best_s.mb_intra)
4931                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4932                         else
4933                             ff_clean_intra_table_entries(s); //old mode?
4934                     }
4935                 }
4936
4937                 if(s->flags & CODEC_FLAG_QP_RD){
4938                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4939                         const int last_qp= backup_s.qscale;
4940                         int dquant, dir, qp, dc[6];
4941                         DCTELEM ac[6][16];
4942                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4943
4944                         assert(backup_s.dquant == 0);
4945
4946                         //FIXME intra
4947                         s->mv_dir= best_s.mv_dir;
4948                         s->mv_type = MV_TYPE_16X16;
4949                         s->mb_intra= best_s.mb_intra;
4950                         s->mv[0][0][0] = best_s.mv[0][0][0];
4951                         s->mv[0][0][1] = best_s.mv[0][0][1];
4952                         s->mv[1][0][0] = best_s.mv[1][0][0];
4953                         s->mv[1][0][1] = best_s.mv[1][0][1];
4954
4955                         dir= s->pict_type == B_TYPE ? 2 : 1;
4956                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4957                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4958                             qp= last_qp + dquant;
4959                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4960                                 break;
4961                             backup_s.dquant= dquant;
4962                             if(s->mb_intra && s->dc_val[0]){
4963                                 for(i=0; i<6; i++){
4964                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4965                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4966                                 }
4967                             }
4968
4969                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4970                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4971                             if(best_s.qscale != qp){
4972                                 if(s->mb_intra && s->dc_val[0]){
4973                                     for(i=0; i<6; i++){
4974                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4975                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4976                                     }
4977                                 }
4978                                 if(dir > 0 && dquant==dir){
4979                                     dquant= 0;
4980                                     dir= -dir;
4981                                 }else
4982                                     break;
4983                             }
4984                         }
4985                         qp= best_s.qscale;
4986                         s->current_picture.qscale_table[xy]= qp;
4987                     }
4988                 }
4989
4990                 copy_context_after_encode(s, &best_s, -1);
4991
4992                 pb_bits_count= put_bits_count(&s->pb);
4993                 flush_put_bits(&s->pb);
4994                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4995                 s->pb= backup_s.pb;
4996
4997                 if(s->data_partitioning){
4998                     pb2_bits_count= put_bits_count(&s->pb2);
4999                     flush_put_bits(&s->pb2);
5000                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5001                     s->pb2= backup_s.pb2;
5002
5003                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5004                     flush_put_bits(&s->tex_pb);
5005                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5006                     s->tex_pb= backup_s.tex_pb;
5007                 }
5008                 s->last_bits= put_bits_count(&s->pb);
5009
5010                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5011                     ff_h263_update_motion_val(s);
5012
5013                 if(next_block==0){ //FIXME 16 vs linesize16
5014                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5015                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5016                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5017                 }
5018
5019                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5020                     MPV_decode_mb(s, s->block);
5021             } else {
5022                 int motion_x, motion_y;
5023                 s->mv_type=MV_TYPE_16X16;
5024                 // only one MB-Type possible
5025
5026                 switch(mb_type){
5027                 case CANDIDATE_MB_TYPE_INTRA:
5028                     s->mv_dir = 0;
5029                     s->mb_intra= 1;
5030                     motion_x= s->mv[0][0][0] = 0;
5031                     motion_y= s->mv[0][0][1] = 0;
5032                     break;
5033                 case CANDIDATE_MB_TYPE_INTER:
5034                     s->mv_dir = MV_DIR_FORWARD;
5035                     s->mb_intra= 0;
5036                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5037                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5038                     break;
5039                 case CANDIDATE_MB_TYPE_INTER_I:
5040                     s->mv_dir = MV_DIR_FORWARD;
5041                     s->mv_type = MV_TYPE_FIELD;
5042                     s->mb_intra= 0;
5043                     for(i=0; i<2; i++){
5044                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5045                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5046                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5047                     }
5048                     motion_x = motion_y = 0;
5049                     break;
5050                 case CANDIDATE_MB_TYPE_INTER4V:
5051                     s->mv_dir = MV_DIR_FORWARD;
5052                     s->mv_type = MV_TYPE_8X8;
5053                     s->mb_intra= 0;
5054                     for(i=0; i<4; i++){
5055                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5056                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5057                     }
5058                     motion_x= motion_y= 0;
5059                     break;
5060                 case CANDIDATE_MB_TYPE_DIRECT:
5061                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5062                     s->mb_intra= 0;
5063                     motion_x=s->b_direct_mv_table[xy][0];
5064                     motion_y=s->b_direct_mv_table[xy][1];
5065                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5066                     break;
5067                 case CANDIDATE_MB_TYPE_BIDIR:
5068                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5069                     s->mb_intra= 0;
5070                     motion_x=0;
5071                     motion_y=0;
5072                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5073                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5074                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5075                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5076                     break;
5077                 case CANDIDATE_MB_TYPE_BACKWARD:
5078                     s->mv_dir = MV_DIR_BACKWARD;
5079                     s->mb_intra= 0;
5080                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5081                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5082                     break;
5083                 case CANDIDATE_MB_TYPE_FORWARD:
5084                     s->mv_dir = MV_DIR_FORWARD;
5085                     s->mb_intra= 0;
5086                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5087                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5088 //                    printf(" %d %d ", motion_x, motion_y);
5089                     break;
5090                 case CANDIDATE_MB_TYPE_FORWARD_I:
5091                     s->mv_dir = MV_DIR_FORWARD;
5092                     s->mv_type = MV_TYPE_FIELD;
5093                     s->mb_intra= 0;
5094                     for(i=0; i<2; i++){
5095                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5096                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5097                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5098                     }
5099                     motion_x=motion_y=0;
5100                     break;
5101                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5102                     s->mv_dir = MV_DIR_BACKWARD;
5103                     s->mv_type = MV_TYPE_FIELD;
5104                     s->mb_intra= 0;
5105                     for(i=0; i<2; i++){
5106                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5107                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5108                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5109                     }
5110                     motion_x=motion_y=0;
5111                     break;
5112                 case CANDIDATE_MB_TYPE_BIDIR_I:
5113                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5114                     s->mv_type = MV_TYPE_FIELD;
5115                     s->mb_intra= 0;
5116                     for(dir=0; dir<2; dir++){
5117                         for(i=0; i<2; i++){
5118                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5119                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5120                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5121                         }
5122                     }
5123                     motion_x=motion_y=0;
5124                     break;
5125                 default:
5126                     motion_x=motion_y=0; //gcc warning fix
5127                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5128                 }
5129
5130                 encode_mb(s, motion_x, motion_y);
5131
5132                 // RAL: Update last macroblock type
5133                 s->last_mv_dir = s->mv_dir;
5134
5135                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5136                     ff_h263_update_motion_val(s);
5137
5138                 MPV_decode_mb(s, s->block);
5139             }
5140
5141             /* clean the MV table in IPS frames for direct mode in B frames */
5142             if(s->mb_intra /* && I,P,S_TYPE */){
5143                 s->p_mv_table[xy][0]=0;
5144                 s->p_mv_table[xy][1]=0;
5145             }
5146
5147             if(s->flags&CODEC_FLAG_PSNR){
5148                 int w= 16;
5149                 int h= 16;
5150
5151                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5152                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5153
5154                 s->current_picture_ptr->error[0] += sse(
5155                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5156                     s->dest[0], w, h, s->linesize);
5157                 s->current_picture_ptr->error[1] += sse(
5158                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5159                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5160                 s->current_picture_ptr->error[2] += sse(
5161                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5162                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5163             }
5164             if(s->loop_filter){
5165                 if(s->out_format == FMT_H263)
5166                     ff_h263_loop_filter(s);
5167             }
5168 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5169         }
5170     }
5171
5172     //not beautiful here but we must write it before flushing so it has to be here
5173     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5174         msmpeg4_encode_ext_header(s);
5175
5176     write_slice_end(s);
5177
5178     /* Send the last GOB if RTP */
5179     if (s->avctx->rtp_callback) {
5180         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5181         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5182         /* Call the RTP callback to send the last GOB */
5183         emms_c();
5184         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5185     }
5186
5187     return 0;
5188 }
5189
5190 #define MERGE(field) dst->field += src->field; src->field=0
5191 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5192     MERGE(me.scene_change_score);
5193     MERGE(me.mc_mb_var_sum_temp);
5194     MERGE(me.mb_var_sum_temp);
5195 }
5196
5197 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5198     int i;
5199
5200     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5201     MERGE(dct_count[1]);
5202     MERGE(mv_bits);
5203     MERGE(i_tex_bits);
5204     MERGE(p_tex_bits);
5205     MERGE(i_count);
5206     MERGE(f_count);
5207     MERGE(b_count);
5208     MERGE(skip_count);
5209     MERGE(misc_bits);
5210     MERGE(error_count);
5211     MERGE(padding_bug_score);
5212
5213     if(dst->avctx->noise_reduction){
5214         for(i=0; i<64; i++){
5215             MERGE(dct_error_sum[0][i]);
5216             MERGE(dct_error_sum[1][i]);
5217         }
5218     }
5219
5220     assert(put_bits_count(&src->pb) % 8 ==0);
5221     assert(put_bits_count(&dst->pb) % 8 ==0);
5222     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5223     flush_put_bits(&dst->pb);
5224 }
5225
5226 static void encode_picture(MpegEncContext *s, int picture_number)
5227 {
5228     int i;
5229     int bits;
5230
5231     s->picture_number = picture_number;
5232
5233     /* Reset the average MB variance */
5234     s->me.mb_var_sum_temp    =
5235     s->me.mc_mb_var_sum_temp = 0;
5236
5237     /* we need to initialize some time vars before we can encode b-frames */
5238     // RAL: Condition added for MPEG1VIDEO
5239     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5240         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5241
5242     s->me.scene_change_score=0;
5243
5244 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5245
5246     if(s->pict_type==I_TYPE){
5247         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5248         else                        s->no_rounding=0;
5249     }else if(s->pict_type!=B_TYPE){
5250         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5251             s->no_rounding ^= 1;
5252     }
5253
5254     s->mb_intra=0; //for the rate distortion & bit compare functions
5255     for(i=1; i<s->avctx->thread_count; i++){
5256         ff_update_duplicate_context(s->thread_context[i], s);
5257     }
5258
5259     ff_init_me(s);
5260
5261     /* Estimate motion for every MB */
5262     if(s->pict_type != I_TYPE){
5263         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5264         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5265         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5266             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5267                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5268             }
5269         }
5270
5271         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5272     }else /* if(s->pict_type == I_TYPE) */{
5273         /* I-Frame */
5274         for(i=0; i<s->mb_stride*s->mb_height; i++)
5275             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5276
5277         if(!s->fixed_qscale){
5278             /* finding spatial complexity for I-frame rate control */
5279             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5280         }
5281     }
5282     for(i=1; i<s->avctx->thread_count; i++){
5283         merge_context_after_me(s, s->thread_context[i]);
5284     }
5285     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5286     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5287     emms_c();
5288
5289     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5290         s->pict_type= I_TYPE;
5291         for(i=0; i<s->mb_stride*s->mb_height; i++)
5292             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5293 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5294     }
5295
5296     if(!s->umvplus){
5297         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5298             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5299
5300             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5301                 int a,b;
5302                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5303                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5304                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5305             }
5306
5307             ff_fix_long_p_mvs(s);
5308             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5309             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5310                 int j;
5311                 for(i=0; i<2; i++){
5312                     for(j=0; j<2; j++)
5313                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5314                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5315                 }
5316             }
5317         }
5318
5319         if(s->pict_type==B_TYPE){
5320             int a, b;
5321
5322             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5323             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5324             s->f_code = FFMAX(a, b);
5325
5326             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5327             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5328             s->b_code = FFMAX(a, b);
5329
5330             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5331             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5332             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5333             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5334             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5335                 int dir, j;
5336                 for(dir=0; dir<2; dir++){
5337                     for(i=0; i<2; i++){
5338                         for(j=0; j<2; j++){
5339                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5340                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5341                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5342                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5343                         }
5344                     }
5345                 }
5346             }
5347         }
5348     }
5349
5350     if (!s->fixed_qscale)
5351         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5352
5353     if(s->adaptive_quant){
5354         switch(s->codec_id){
5355         case CODEC_ID_MPEG4:
5356             ff_clean_mpeg4_qscales(s);
5357             break;
5358         case CODEC_ID_H263:
5359         case CODEC_ID_H263P:
5360         case CODEC_ID_FLV1:
5361             ff_clean_h263_qscales(s);
5362             break;
5363         }
5364
5365         s->lambda= s->lambda_table[0];
5366         //FIXME broken
5367     }else
5368         s->lambda= s->current_picture.quality;
5369 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5370     update_qscale(s);
5371
5372     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5373         s->qscale= 3; //reduce clipping problems
5374
5375     if (s->out_format == FMT_MJPEG) {
5376         /* for mjpeg, we do include qscale in the matrix */
5377         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5378         for(i=1;i<64;i++){
5379             int j= s->dsp.idct_permutation[i];
5380
5381             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5382         }
5383         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5384                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5385         s->qscale= 8;
5386     }
5387
5388     //FIXME var duplication
5389     s->current_picture_ptr->key_frame=
5390     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5391     s->current_picture_ptr->pict_type=
5392     s->current_picture.pict_type= s->pict_type;
5393
5394     if(s->current_picture.key_frame)
5395         s->picture_in_gop_number=0;
5396
5397     s->last_bits= put_bits_count(&s->pb);
5398     switch(s->out_format) {
5399     case FMT_MJPEG:
5400         mjpeg_picture_header(s);
5401         break;
5402 #ifdef CONFIG_H261_ENCODER
5403     case FMT_H261:
5404         ff_h261_encode_picture_header(s, picture_number);
5405         break;
5406 #endif
5407     case FMT_H263:
5408         if (s->codec_id == CODEC_ID_WMV2)
5409             ff_wmv2_encode_picture_header(s, picture_number);
5410         else if (s->h263_msmpeg4)
5411             msmpeg4_encode_picture_header(s, picture_number);
5412         else if (s->h263_pred)
5413             mpeg4_encode_picture_header(s, picture_number);
5414 #ifdef CONFIG_RV10_ENCODER
5415         else if (s->codec_id == CODEC_ID_RV10)
5416             rv10_encode_picture_header(s, picture_number);
5417 #endif
5418 #ifdef CONFIG_RV20_ENCODER
5419         else if (s->codec_id == CODEC_ID_RV20)
5420             rv20_encode_picture_header(s, picture_number);
5421 #endif
5422         else if (s->codec_id == CODEC_ID_FLV1)
5423             ff_flv_encode_picture_header(s, picture_number);
5424         else
5425             h263_encode_picture_header(s, picture_number);
5426         break;
5427     case FMT_MPEG1:
5428         mpeg1_encode_picture_header(s, picture_number);
5429         break;
5430     case FMT_H264:
5431         break;
5432     default:
5433         assert(0);
5434     }
5435     bits= put_bits_count(&s->pb);
5436     s->header_bits= bits - s->last_bits;
5437
5438     for(i=1; i<s->avctx->thread_count; i++){
5439         update_duplicate_context_after_me(s->thread_context[i], s);
5440     }
5441     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5442     for(i=1; i<s->avctx->thread_count; i++){
5443         merge_context_after_encode(s, s->thread_context[i]);
5444     }
5445     emms_c();
5446 }
5447
5448 #endif //CONFIG_ENCODERS
5449
5450 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5451     const int intra= s->mb_intra;
5452     int i;
5453
5454     s->dct_count[intra]++;
5455
5456     for(i=0; i<64; i++){
5457         int level= block[i];
5458
5459         if(level){
5460             if(level>0){
5461                 s->dct_error_sum[intra][i] += level;
5462                 level -= s->dct_offset[intra][i];
5463                 if(level<0) level=0;
5464             }else{
5465                 s->dct_error_sum[intra][i] -= level;
5466                 level += s->dct_offset[intra][i];
5467                 if(level>0) level=0;
5468             }
5469             block[i]= level;
5470         }
5471     }
5472 }
5473
5474 #ifdef CONFIG_ENCODERS
5475
5476 static int dct_quantize_trellis_c(MpegEncContext *s,
5477                         DCTELEM *block, int n,
5478                         int qscale, int *overflow){
5479     const int *qmat;
5480     const uint8_t *scantable= s->intra_scantable.scantable;
5481     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5482     int max=0;
5483     unsigned int threshold1, threshold2;
5484     int bias=0;
5485     int run_tab[65];
5486     int level_tab[65];
5487     int score_tab[65];
5488     int survivor[65];
5489     int survivor_count;
5490     int last_run=0;
5491     int last_level=0;
5492     int last_score= 0;
5493     int last_i;
5494     int coeff[2][64];
5495     int coeff_count[64];
5496     int qmul, qadd, start_i, last_non_zero, i, dc;
5497     const int esc_length= s->ac_esc_length;
5498     uint8_t * length;
5499     uint8_t * last_length;
5500     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5501
5502     s->dsp.fdct (block);
5503
5504     if(s->dct_error_sum)
5505         s->denoise_dct(s, block);
5506     qmul= qscale*16;
5507     qadd= ((qscale-1)|1)*8;
5508
5509     if (s->mb_intra) {
5510         int q;
5511         if (!s->h263_aic) {
5512             if (n < 4)
5513                 q = s->y_dc_scale;
5514             else
5515                 q = s->c_dc_scale;
5516             q = q << 3;
5517         } else{
5518             /* For AIC we skip quant/dequant of INTRADC */
5519             q = 1 << 3;
5520             qadd=0;
5521         }
5522
5523         /* note: block[0] is assumed to be positive */
5524         block[0] = (block[0] + (q >> 1)) / q;
5525         start_i = 1;
5526         last_non_zero = 0;
5527         qmat = s->q_intra_matrix[qscale];
5528         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5529             bias= 1<<(QMAT_SHIFT-1);
5530         length     = s->intra_ac_vlc_length;
5531         last_length= s->intra_ac_vlc_last_length;
5532     } else {
5533         start_i = 0;
5534         last_non_zero = -1;
5535         qmat = s->q_inter_matrix[qscale];
5536         length     = s->inter_ac_vlc_length;
5537         last_length= s->inter_ac_vlc_last_length;
5538     }
5539     last_i= start_i;
5540
5541     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5542     threshold2= (threshold1<<1);
5543
5544     for(i=63; i>=start_i; i--) {
5545         const int j = scantable[i];
5546         int level = block[j] * qmat[j];
5547
5548         if(((unsigned)(level+threshold1))>threshold2){
5549             last_non_zero = i;
5550             break;
5551         }
5552     }
5553
5554     for(i=start_i; i<=last_non_zero; i++) {
5555         const int j = scantable[i];
5556         int level = block[j] * qmat[j];
5557
5558 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5559 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5560         if(((unsigned)(level+threshold1))>threshold2){
5561             if(level>0){
5562                 level= (bias + level)>>QMAT_SHIFT;
5563                 coeff[0][i]= level;
5564                 coeff[1][i]= level-1;
5565 //                coeff[2][k]= level-2;
5566             }else{
5567                 level= (bias - level)>>QMAT_SHIFT;
5568                 coeff[0][i]= -level;
5569                 coeff[1][i]= -level+1;
5570 //                coeff[2][k]= -level+2;
5571             }
5572             coeff_count[i]= FFMIN(level, 2);
5573             assert(coeff_count[i]);
5574             max |=level;
5575         }else{
5576             coeff[0][i]= (level>>31)|1;
5577             coeff_count[i]= 1;
5578         }
5579     }
5580
5581     *overflow= s->max_qcoeff < max; //overflow might have happened
5582
5583     if(last_non_zero < start_i){
5584         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5585         return last_non_zero;
5586     }
5587
5588     score_tab[start_i]= 0;
5589     survivor[0]= start_i;
5590     survivor_count= 1;
5591
5592     for(i=start_i; i<=last_non_zero; i++){
5593         int level_index, j;
5594         const int dct_coeff= ABS(block[ scantable[i] ]);
5595         const int zero_distoration= dct_coeff*dct_coeff;
5596         int best_score=256*256*256*120;
5597         for(level_index=0; level_index < coeff_count[i]; level_index++){
5598             int distoration;
5599             int level= coeff[level_index][i];
5600             const int alevel= ABS(level);
5601             int unquant_coeff;
5602
5603             assert(level);
5604
5605             if(s->out_format == FMT_H263){
5606                 unquant_coeff= alevel*qmul + qadd;
5607             }else{ //MPEG1
5608                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5609                 if(s->mb_intra){
5610                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5611                         unquant_coeff =   (unquant_coeff - 1) | 1;
5612                 }else{
5613                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5614                         unquant_coeff =   (unquant_coeff - 1) | 1;
5615                 }
5616                 unquant_coeff<<= 3;
5617             }
5618
5619             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5620             level+=64;
5621             if((level&(~127)) == 0){
5622                 for(j=survivor_count-1; j>=0; j--){
5623                     int run= i - survivor[j];
5624                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5625                     score += score_tab[i-run];
5626
5627                     if(score < best_score){
5628                         best_score= score;
5629                         run_tab[i+1]= run;
5630                         level_tab[i+1]= level-64;
5631                     }
5632                 }
5633
5634                 if(s->out_format == FMT_H263){
5635                     for(j=survivor_count-1; j>=0; j--){
5636                         int run= i - survivor[j];
5637                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5638                         score += score_tab[i-run];
5639                         if(score < last_score){
5640                             last_score= score;
5641                             last_run= run;
5642                             last_level= level-64;
5643                             last_i= i+1;
5644                         }
5645                     }
5646                 }
5647             }else{
5648                 distoration += esc_length*lambda;
5649                 for(j=survivor_count-1; j>=0; j--){
5650                     int run= i - survivor[j];
5651                     int score= distoration + score_tab[i-run];
5652
5653                     if(score < best_score){
5654                         best_score= score;
5655                         run_tab[i+1]= run;
5656                         level_tab[i+1]= level-64;
5657                     }
5658                 }
5659
5660                 if(s->out_format == FMT_H263){
5661                   for(j=survivor_count-1; j>=0; j--){
5662                         int run= i - survivor[j];
5663                         int score= distoration + score_tab[i-run];
5664                         if(score < last_score){
5665                             last_score= score;
5666                             last_run= run;
5667                             last_level= level-64;
5668                             last_i= i+1;
5669                         }
5670                     }
5671                 }
5672             }
5673         }
5674
5675         score_tab[i+1]= best_score;
5676
5677         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5678         if(last_non_zero <= 27){
5679             for(; survivor_count; survivor_count--){
5680                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5681                     break;
5682             }
5683         }else{
5684             for(; survivor_count; survivor_count--){
5685                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5686                     break;
5687             }
5688         }
5689
5690         survivor[ survivor_count++ ]= i+1;
5691     }
5692
5693     if(s->out_format != FMT_H263){
5694         last_score= 256*256*256*120;
5695         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5696             int score= score_tab[i];
5697             if(i) score += lambda*2; //FIXME exacter?
5698
5699             if(score < last_score){
5700                 last_score= score;
5701                 last_i= i;
5702                 last_level= level_tab[i];
5703                 last_run= run_tab[i];
5704             }
5705         }
5706     }
5707
5708     s->coded_score[n] = last_score;
5709
5710     dc= ABS(block[0]);
5711     last_non_zero= last_i - 1;
5712     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5713
5714     if(last_non_zero < start_i)
5715         return last_non_zero;
5716
5717     if(last_non_zero == 0 && start_i == 0){
5718         int best_level= 0;
5719         int best_score= dc * dc;
5720
5721         for(i=0; i<coeff_count[0]; i++){
5722             int level= coeff[i][0];
5723             int alevel= ABS(level);
5724             int unquant_coeff, score, distortion;
5725
5726             if(s->out_format == FMT_H263){
5727                     unquant_coeff= (alevel*qmul + qadd)>>3;
5728             }else{ //MPEG1
5729                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5730                     unquant_coeff =   (unquant_coeff - 1) | 1;
5731             }
5732             unquant_coeff = (unquant_coeff + 4) >> 3;
5733             unquant_coeff<<= 3 + 3;
5734
5735             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5736             level+=64;
5737             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5738             else                    score= distortion + esc_length*lambda;
5739
5740             if(score < best_score){
5741                 best_score= score;
5742                 best_level= level - 64;
5743             }
5744         }
5745         block[0]= best_level;
5746         s->coded_score[n] = best_score - dc*dc;
5747         if(best_level == 0) return -1;
5748         else                return last_non_zero;
5749     }
5750
5751     i= last_i;
5752     assert(last_level);
5753
5754     block[ perm_scantable[last_non_zero] ]= last_level;
5755     i -= last_run + 1;
5756
5757     for(; i>start_i; i -= run_tab[i] + 1){
5758         block[ perm_scantable[i-1] ]= level_tab[i];
5759     }
5760
5761     return last_non_zero;
5762 }
5763
5764 //#define REFINE_STATS 1
5765 static int16_t basis[64][64];
5766
5767 static void build_basis(uint8_t *perm){
5768     int i, j, x, y;
5769     emms_c();
5770     for(i=0; i<8; i++){
5771         for(j=0; j<8; j++){
5772             for(y=0; y<8; y++){
5773                 for(x=0; x<8; x++){
5774                     double s= 0.25*(1<<BASIS_SHIFT);
5775                     int index= 8*i + j;
5776                     int perm_index= perm[index];
5777                     if(i==0) s*= sqrt(0.5);
5778                     if(j==0) s*= sqrt(0.5);
5779                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5780                 }
5781             }
5782         }
5783     }
5784 }
5785
5786 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5787                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5788                         int n, int qscale){
5789     int16_t rem[64];
5790     DCTELEM d1[64] __align16;
5791     const int *qmat;
5792     const uint8_t *scantable= s->intra_scantable.scantable;
5793     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5794 //    unsigned int threshold1, threshold2;
5795 //    int bias=0;
5796     int run_tab[65];
5797     int prev_run=0;
5798     int prev_level=0;
5799     int qmul, qadd, start_i, last_non_zero, i, dc;
5800     uint8_t * length;
5801     uint8_t * last_length;
5802     int lambda;
5803     int rle_index, run, q, sum;
5804 #ifdef REFINE_STATS
5805 static int count=0;
5806 static int after_last=0;
5807 static int to_zero=0;
5808 static int from_zero=0;
5809 static int raise=0;
5810 static int lower=0;
5811 static int messed_sign=0;
5812 #endif
5813
5814     if(basis[0][0] == 0)
5815         build_basis(s->dsp.idct_permutation);
5816
5817     qmul= qscale*2;
5818     qadd= (qscale-1)|1;
5819     if (s->mb_intra) {
5820         if (!s->h263_aic) {
5821             if (n < 4)
5822                 q = s->y_dc_scale;
5823             else
5824                 q = s->c_dc_scale;
5825         } else{
5826             /* For AIC we skip quant/dequant of INTRADC */
5827             q = 1;
5828             qadd=0;
5829         }
5830         q <<= RECON_SHIFT-3;
5831         /* note: block[0] is assumed to be positive */
5832         dc= block[0]*q;
5833 //        block[0] = (block[0] + (q >> 1)) / q;
5834         start_i = 1;
5835         qmat = s->q_intra_matrix[qscale];
5836 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5837 //            bias= 1<<(QMAT_SHIFT-1);
5838         length     = s->intra_ac_vlc_length;
5839         last_length= s->intra_ac_vlc_last_length;
5840     } else {
5841         dc= 0;
5842         start_i = 0;
5843         qmat = s->q_inter_matrix[qscale];
5844         length     = s->inter_ac_vlc_length;
5845         last_length= s->inter_ac_vlc_last_length;
5846     }
5847     last_non_zero = s->block_last_index[n];
5848
5849 #ifdef REFINE_STATS
5850 {START_TIMER
5851 #endif
5852     dc += (1<<(RECON_SHIFT-1));
5853     for(i=0; i<64; i++){
5854         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5855     }
5856 #ifdef REFINE_STATS
5857 STOP_TIMER("memset rem[]")}
5858 #endif
5859     sum=0;
5860     for(i=0; i<64; i++){
5861         int one= 36;
5862         int qns=4;
5863         int w;
5864
5865         w= ABS(weight[i]) + qns*one;
5866         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5867
5868         weight[i] = w;
5869 //        w=weight[i] = (63*qns + (w/2)) / w;
5870
5871         assert(w>0);
5872         assert(w<(1<<6));
5873         sum += w*w;
5874     }
5875     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5876 #ifdef REFINE_STATS
5877 {START_TIMER
5878 #endif
5879     run=0;
5880     rle_index=0;
5881     for(i=start_i; i<=last_non_zero; i++){
5882         int j= perm_scantable[i];
5883         const int level= block[j];
5884         int coeff;
5885
5886         if(level){
5887             if(level<0) coeff= qmul*level - qadd;
5888             else        coeff= qmul*level + qadd;
5889             run_tab[rle_index++]=run;
5890             run=0;
5891
5892             s->dsp.add_8x8basis(rem, basis[j], coeff);
5893         }else{
5894             run++;
5895         }
5896     }
5897 #ifdef REFINE_STATS
5898 if(last_non_zero>0){
5899 STOP_TIMER("init rem[]")
5900 }
5901 }
5902
5903 {START_TIMER
5904 #endif
5905     for(;;){
5906         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5907         int best_coeff=0;
5908         int best_change=0;
5909         int run2, best_unquant_change=0, analyze_gradient;
5910 #ifdef REFINE_STATS
5911 {START_TIMER
5912 #endif
5913         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5914
5915         if(analyze_gradient){
5916 #ifdef REFINE_STATS
5917 {START_TIMER
5918 #endif
5919             for(i=0; i<64; i++){
5920                 int w= weight[i];
5921
5922                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5923             }
5924 #ifdef REFINE_STATS
5925 STOP_TIMER("rem*w*w")}
5926 {START_TIMER
5927 #endif
5928             s->dsp.fdct(d1);
5929 #ifdef REFINE_STATS
5930 STOP_TIMER("dct")}
5931 #endif
5932         }
5933
5934         if(start_i){
5935             const int level= block[0];
5936             int change, old_coeff;
5937
5938             assert(s->mb_intra);
5939
5940             old_coeff= q*level;
5941
5942             for(change=-1; change<=1; change+=2){
5943                 int new_level= level + change;
5944                 int score, new_coeff;
5945
5946                 new_coeff= q*new_level;
5947                 if(new_coeff >= 2048 || new_coeff < 0)
5948                     continue;
5949
5950                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5951                 if(score<best_score){
5952                     best_score= score;
5953                     best_coeff= 0;
5954                     best_change= change;
5955                     best_unquant_change= new_coeff - old_coeff;
5956                 }
5957             }
5958         }
5959
5960         run=0;
5961         rle_index=0;
5962         run2= run_tab[rle_index++];
5963         prev_level=0;
5964         prev_run=0;
5965
5966         for(i=start_i; i<64; i++){
5967             int j= perm_scantable[i];
5968             const int level= block[j];
5969             int change, old_coeff;
5970
5971             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5972                 break;
5973
5974             if(level){
5975                 if(level<0) old_coeff= qmul*level - qadd;
5976                 else        old_coeff= qmul*level + qadd;
5977                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5978             }else{
5979                 old_coeff=0;
5980                 run2--;
5981                 assert(run2>=0 || i >= last_non_zero );
5982             }
5983
5984             for(change=-1; change<=1; change+=2){
5985                 int new_level= level + change;
5986                 int score, new_coeff, unquant_change;
5987
5988                 score=0;
5989                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5990                    continue;
5991
5992                 if(new_level){
5993                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5994                     else            new_coeff= qmul*new_level + qadd;
5995                     if(new_coeff >= 2048 || new_coeff <= -2048)
5996                         continue;
5997                     //FIXME check for overflow
5998
5999                     if(level){
6000                         if(level < 63 && level > -63){
6001                             if(i < last_non_zero)
6002                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6003                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6004                             else
6005                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6006                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6007                         }
6008                     }else{
6009                         assert(ABS(new_level)==1);
6010
6011                         if(analyze_gradient){
6012                             int g= d1[ scantable[i] ];
6013                             if(g && (g^new_level) >= 0)
6014                                 continue;
6015                         }
6016
6017                         if(i < last_non_zero){
6018                             int next_i= i + run2 + 1;
6019                             int next_level= block[ perm_scantable[next_i] ] + 64;
6020
6021                             if(next_level&(~127))
6022                                 next_level= 0;
6023
6024                             if(next_i < last_non_zero)
6025                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6026                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6027                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6028                             else
6029                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6030                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6031                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6032                         }else{
6033                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6034                             if(prev_level){
6035                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6036                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6037                             }
6038                         }
6039                     }
6040                 }else{
6041                     new_coeff=0;
6042                     assert(ABS(level)==1);
6043
6044                     if(i < last_non_zero){
6045                         int next_i= i + run2 + 1;
6046                         int next_level= block[ perm_scantable[next_i] ] + 64;
6047
6048                         if(next_level&(~127))
6049                             next_level= 0;
6050
6051                         if(next_i < last_non_zero)
6052                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6053                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6054                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6055                         else
6056                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6057                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6058                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6059                     }else{
6060                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6061                         if(prev_level){
6062                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6063                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6064                         }
6065                     }
6066                 }
6067
6068                 score *= lambda;
6069
6070                 unquant_change= new_coeff - old_coeff;
6071                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6072
6073                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6074                 if(score<best_score){
6075                     best_score= score;
6076                     best_coeff= i;
6077                     best_change= change;
6078                     best_unquant_change= unquant_change;
6079                 }
6080             }
6081             if(level){
6082                 prev_level= level + 64;
6083                 if(prev_level&(~127))
6084                     prev_level= 0;
6085                 prev_run= run;
6086                 run=0;
6087             }else{
6088                 run++;
6089             }
6090         }
6091 #ifdef REFINE_STATS
6092 STOP_TIMER("iterative step")}
6093 #endif
6094
6095         if(best_change){
6096             int j= perm_scantable[ best_coeff ];
6097
6098             block[j] += best_change;
6099
6100             if(best_coeff > last_non_zero){
6101                 last_non_zero= best_coeff;
6102                 assert(block[j]);
6103 #ifdef REFINE_STATS
6104 after_last++;
6105 #endif
6106             }else{
6107 #ifdef REFINE_STATS
6108 if(block[j]){
6109     if(block[j] - best_change){
6110         if(ABS(block[j]) > ABS(block[j] - best_change)){
6111             raise++;
6112         }else{
6113             lower++;
6114         }
6115     }else{
6116         from_zero++;
6117     }
6118 }else{
6119     to_zero++;
6120 }
6121 #endif
6122                 for(; last_non_zero>=start_i; last_non_zero--){
6123                     if(block[perm_scantable[last_non_zero]])
6124                         break;
6125                 }
6126             }
6127 #ifdef REFINE_STATS
6128 count++;
6129 if(256*256*256*64 % count == 0){
6130     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6131 }
6132 #endif
6133             run=0;
6134             rle_index=0;
6135             for(i=start_i; i<=last_non_zero; i++){
6136                 int j= perm_scantable[i];
6137                 const int level= block[j];
6138
6139                  if(level){
6140                      run_tab[rle_index++]=run;
6141                      run=0;
6142                  }else{
6143                      run++;
6144                  }
6145             }
6146
6147             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6148         }else{
6149             break;
6150         }
6151     }
6152 #ifdef REFINE_STATS
6153 if(last_non_zero>0){
6154 STOP_TIMER("iterative search")
6155 }
6156 }
6157 #endif
6158
6159     return last_non_zero;
6160 }
6161
6162 static int dct_quantize_c(MpegEncContext *s,
6163                         DCTELEM *block, int n,
6164                         int qscale, int *overflow)
6165 {
6166     int i, j, level, last_non_zero, q, start_i;
6167     const int *qmat;
6168     const uint8_t *scantable= s->intra_scantable.scantable;
6169     int bias;
6170     int max=0;
6171     unsigned int threshold1, threshold2;
6172
6173     s->dsp.fdct (block);
6174
6175     if(s->dct_error_sum)
6176         s->denoise_dct(s, block);
6177
6178     if (s->mb_intra) {
6179         if (!s->h263_aic) {
6180             if (n < 4)
6181                 q = s->y_dc_scale;
6182             else
6183                 q = s->c_dc_scale;
6184             q = q << 3;
6185         } else
6186             /* For AIC we skip quant/dequant of INTRADC */
6187             q = 1 << 3;
6188
6189         /* note: block[0] is assumed to be positive */
6190         block[0] = (block[0] + (q >> 1)) / q;
6191         start_i = 1;
6192         last_non_zero = 0;
6193         qmat = s->q_intra_matrix[qscale];
6194         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6195     } else {
6196         start_i = 0;
6197         last_non_zero = -1;
6198         qmat = s->q_inter_matrix[qscale];
6199         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6200     }
6201     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6202     threshold2= (threshold1<<1);
6203     for(i=63;i>=start_i;i--) {
6204         j = scantable[i];
6205         level = block[j] * qmat[j];
6206
6207         if(((unsigned)(level+threshold1))>threshold2){
6208             last_non_zero = i;
6209             break;
6210         }else{
6211             block[j]=0;
6212         }
6213     }
6214     for(i=start_i; i<=last_non_zero; i++) {
6215         j = scantable[i];
6216         level = block[j] * qmat[j];
6217
6218 //        if(   bias+level >= (1<<QMAT_SHIFT)
6219 //           || bias-level >= (1<<QMAT_SHIFT)){
6220         if(((unsigned)(level+threshold1))>threshold2){
6221             if(level>0){
6222                 level= (bias + level)>>QMAT_SHIFT;
6223                 block[j]= level;
6224             }else{
6225                 level= (bias - level)>>QMAT_SHIFT;
6226                 block[j]= -level;
6227             }
6228             max |=level;
6229         }else{
6230             block[j]=0;
6231         }
6232     }
6233     *overflow= s->max_qcoeff < max; //overflow might have happened
6234
6235     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6236     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6237         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6238
6239     return last_non_zero;
6240 }
6241
6242 #endif //CONFIG_ENCODERS
6243
6244 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6245                                    DCTELEM *block, int n, int qscale)
6246 {
6247     int i, level, nCoeffs;
6248     const uint16_t *quant_matrix;
6249
6250     nCoeffs= s->block_last_index[n];
6251
6252     if (n < 4)
6253         block[0] = block[0] * s->y_dc_scale;
6254     else
6255         block[0] = block[0] * s->c_dc_scale;
6256     /* XXX: only mpeg1 */
6257     quant_matrix = s->intra_matrix;
6258     for(i=1;i<=nCoeffs;i++) {
6259         int j= s->intra_scantable.permutated[i];
6260         level = block[j];
6261         if (level) {
6262             if (level < 0) {
6263                 level = -level;
6264                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6265                 level = (level - 1) | 1;
6266                 level = -level;
6267             } else {
6268                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6269                 level = (level - 1) | 1;
6270             }
6271             block[j] = level;
6272         }
6273     }
6274 }
6275
6276 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6277                                    DCTELEM *block, int n, int qscale)
6278 {
6279     int i, level, nCoeffs;
6280     const uint16_t *quant_matrix;
6281
6282     nCoeffs= s->block_last_index[n];
6283
6284     quant_matrix = s->inter_matrix;
6285     for(i=0; i<=nCoeffs; i++) {
6286         int j= s->intra_scantable.permutated[i];
6287         level = block[j];
6288         if (level) {
6289             if (level < 0) {
6290                 level = -level;
6291                 level = (((level << 1) + 1) * qscale *
6292                          ((int) (quant_matrix[j]))) >> 4;
6293                 level = (level - 1) | 1;
6294                 level = -level;
6295             } else {
6296                 level = (((level << 1) + 1) * qscale *
6297                          ((int) (quant_matrix[j]))) >> 4;
6298                 level = (level - 1) | 1;
6299             }
6300             block[j] = level;
6301         }
6302     }
6303 }
6304
6305 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6306                                    DCTELEM *block, int n, int qscale)
6307 {
6308     int i, level, nCoeffs;
6309     const uint16_t *quant_matrix;
6310
6311     if(s->alternate_scan) nCoeffs= 63;
6312     else nCoeffs= s->block_last_index[n];
6313
6314     if (n < 4)
6315         block[0] = block[0] * s->y_dc_scale;
6316     else
6317         block[0] = block[0] * s->c_dc_scale;
6318     quant_matrix = s->intra_matrix;
6319     for(i=1;i<=nCoeffs;i++) {
6320         int j= s->intra_scantable.permutated[i];
6321         level = block[j];
6322         if (level) {
6323             if (level < 0) {
6324                 level = -level;
6325                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6326                 level = -level;
6327             } else {
6328                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6329             }
6330             block[j] = level;
6331         }
6332     }
6333 }
6334
6335 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6336                                    DCTELEM *block, int n, int qscale)
6337 {
6338     int i, level, nCoeffs;
6339     const uint16_t *quant_matrix;
6340     int sum=-1;
6341
6342     if(s->alternate_scan) nCoeffs= 63;
6343     else nCoeffs= s->block_last_index[n];
6344
6345     quant_matrix = s->inter_matrix;
6346     for(i=0; i<=nCoeffs; i++) {
6347         int j= s->intra_scantable.permutated[i];
6348         level = block[j];
6349         if (level) {
6350             if (level < 0) {
6351                 level = -level;
6352                 level = (((level << 1) + 1) * qscale *
6353                          ((int) (quant_matrix[j]))) >> 4;
6354                 level = -level;
6355             } else {
6356                 level = (((level << 1) + 1) * qscale *
6357                          ((int) (quant_matrix[j]))) >> 4;
6358             }
6359             block[j] = level;
6360             sum+=level;
6361         }
6362     }
6363     block[63]^=sum&1;
6364 }
6365
6366 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6367                                   DCTELEM *block, int n, int qscale)
6368 {
6369     int i, level, qmul, qadd;
6370     int nCoeffs;
6371
6372     assert(s->block_last_index[n]>=0);
6373
6374     qmul = qscale << 1;
6375
6376     if (!s->h263_aic) {
6377         if (n < 4)
6378             block[0] = block[0] * s->y_dc_scale;
6379         else
6380             block[0] = block[0] * s->c_dc_scale;
6381         qadd = (qscale - 1) | 1;
6382     }else{
6383         qadd = 0;
6384     }
6385     if(s->ac_pred)
6386         nCoeffs=63;
6387     else
6388         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6389
6390     for(i=1; i<=nCoeffs; i++) {
6391         level = block[i];
6392         if (level) {
6393             if (level < 0) {
6394                 level = level * qmul - qadd;
6395             } else {
6396                 level = level * qmul + qadd;
6397             }
6398             block[i] = level;
6399         }
6400     }
6401 }
6402
6403 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6404                                   DCTELEM *block, int n, int qscale)
6405 {
6406     int i, level, qmul, qadd;
6407     int nCoeffs;
6408
6409     assert(s->block_last_index[n]>=0);
6410
6411     qadd = (qscale - 1) | 1;
6412     qmul = qscale << 1;
6413
6414     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6415
6416     for(i=0; i<=nCoeffs; i++) {
6417         level = block[i];
6418         if (level) {
6419             if (level < 0) {
6420                 level = level * qmul - qadd;
6421             } else {
6422                 level = level * qmul + qadd;
6423             }
6424             block[i] = level;
6425         }
6426     }
6427 }
6428
6429 #ifdef CONFIG_ENCODERS
6430 AVCodec h263_encoder = {
6431     "h263",
6432     CODEC_TYPE_VIDEO,
6433     CODEC_ID_H263,
6434     sizeof(MpegEncContext),
6435     MPV_encode_init,
6436     MPV_encode_picture,
6437     MPV_encode_end,
6438     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6439 };
6440
6441 AVCodec h263p_encoder = {
6442     "h263p",
6443     CODEC_TYPE_VIDEO,
6444     CODEC_ID_H263P,
6445     sizeof(MpegEncContext),
6446     MPV_encode_init,
6447     MPV_encode_picture,
6448     MPV_encode_end,
6449     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6450 };
6451
6452 AVCodec flv_encoder = {
6453     "flv",
6454     CODEC_TYPE_VIDEO,
6455     CODEC_ID_FLV1,
6456     sizeof(MpegEncContext),
6457     MPV_encode_init,
6458     MPV_encode_picture,
6459     MPV_encode_end,
6460     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6461 };
6462
6463 AVCodec rv10_encoder = {
6464     "rv10",
6465     CODEC_TYPE_VIDEO,
6466     CODEC_ID_RV10,
6467     sizeof(MpegEncContext),
6468     MPV_encode_init,
6469     MPV_encode_picture,
6470     MPV_encode_end,
6471     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6472 };
6473
6474 AVCodec rv20_encoder = {
6475     "rv20",
6476     CODEC_TYPE_VIDEO,
6477     CODEC_ID_RV20,
6478     sizeof(MpegEncContext),
6479     MPV_encode_init,
6480     MPV_encode_picture,
6481     MPV_encode_end,
6482     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6483 };
6484
6485 AVCodec mpeg4_encoder = {
6486     "mpeg4",
6487     CODEC_TYPE_VIDEO,
6488     CODEC_ID_MPEG4,
6489     sizeof(MpegEncContext),
6490     MPV_encode_init,
6491     MPV_encode_picture,
6492     MPV_encode_end,
6493     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6494     .capabilities= CODEC_CAP_DELAY,
6495 };
6496
6497 AVCodec msmpeg4v1_encoder = {
6498     "msmpeg4v1",
6499     CODEC_TYPE_VIDEO,
6500     CODEC_ID_MSMPEG4V1,
6501     sizeof(MpegEncContext),
6502     MPV_encode_init,
6503     MPV_encode_picture,
6504     MPV_encode_end,
6505     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6506 };
6507
6508 AVCodec msmpeg4v2_encoder = {
6509     "msmpeg4v2",
6510     CODEC_TYPE_VIDEO,
6511     CODEC_ID_MSMPEG4V2,
6512     sizeof(MpegEncContext),
6513     MPV_encode_init,
6514     MPV_encode_picture,
6515     MPV_encode_end,
6516     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6517 };
6518
6519 AVCodec msmpeg4v3_encoder = {
6520     "msmpeg4",
6521     CODEC_TYPE_VIDEO,
6522     CODEC_ID_MSMPEG4V3,
6523     sizeof(MpegEncContext),
6524     MPV_encode_init,
6525     MPV_encode_picture,
6526     MPV_encode_end,
6527     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6528 };
6529
6530 AVCodec wmv1_encoder = {
6531     "wmv1",
6532     CODEC_TYPE_VIDEO,
6533     CODEC_ID_WMV1,
6534     sizeof(MpegEncContext),
6535     MPV_encode_init,
6536     MPV_encode_picture,
6537     MPV_encode_end,
6538     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6539 };
6540
6541 AVCodec mjpeg_encoder = {
6542     "mjpeg",
6543     CODEC_TYPE_VIDEO,
6544     CODEC_ID_MJPEG,
6545     sizeof(MpegEncContext),
6546     MPV_encode_init,
6547     MPV_encode_picture,
6548     MPV_encode_end,
6549     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6550 };
6551
6552 #endif //CONFIG_ENCODERS