git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif //CONFIG_ENCODERS
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses different ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
 546     COPY(progressive_frame); //FIXME don't set in encode_header
 547     COPY(partitioned_frame); //FIXME don't set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i, dummy;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
 896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 897         return -1;
 898     }
 899
 900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
 901         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
 902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 903             return -1;
 904         }
 905     }else{
 906         if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
 907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
 908             return -1;
 909         }
 910     }
 911
 912     s->bit_rate = avctx->bit_rate;
 913     s->width = avctx->width;
 914     s->height = avctx->height;
 915     if(avctx->gop_size > 600){
 916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 917         avctx->gop_size=600;
 918     }
 919     s->gop_size = avctx->gop_size;
 920     s->avctx = avctx;
 921     s->flags= avctx->flags;
 922     s->flags2= avctx->flags2;
 923     s->max_b_frames= avctx->max_b_frames;
 924     s->codec_id= avctx->codec->id;
 925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 927     s->strict_std_compliance= avctx->strict_std_compliance;
 928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 930     s->mpeg_quant= avctx->mpeg_quant;
 931     s->rtp_mode= !!avctx->rtp_payload_size;
 932     s->intra_dc_precision= avctx->intra_dc_precision;
 933     s->user_specified_pts = AV_NOPTS_VALUE;
 934
 935     if (s->gop_size <= 1) {
 936         s->intra_only = 1;
 937         s->gop_size = 12;
 938     } else {
 939         s->intra_only = 0;
 940     }
 941
 942     s->me_method = avctx->me_method;
 943
 944     /* Fixed QSCALE */
 945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 946
 947     s->adaptive_quant= (   s->avctx->lumi_masking
 948                         || s->avctx->dark_masking
 949                         || s->avctx->temporal_cplx_masking
 950                         || s->avctx->spatial_cplx_masking
 951                         || s->avctx->p_masking
 952                         || s->avctx->border_masking
 953                         || (s->flags&CODEC_FLAG_QP_RD))
 954                        && !s->fixed_qscale;
 955
 956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 959
 960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 962         return -1;
 963     }
 964
 965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 967     }
 968
 969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 971         return -1;
 972     }
 973
 974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 976         return -1;
 977     }
 978
 979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 982
 983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 984     }
 985
 986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 989         return -1;
 990     }
 991
 992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
 994         return -1;
 995     }
 996
 997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 999         return -1;
1000     }
1001
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1025         return -1;
1026     }
1027
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->time_base.den || !avctx->time_base.num){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057
1058     i= (INT_MAX/2+128)>>8;
1059     if(avctx->me_threshold >= i){
1060         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1061         return -1;
1062     }
1063     if(avctx->mb_threshold >= i){
1064         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1065         return -1;
1066     }
1067
1068     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1069         av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
1070         return -1;
1071     }
1072
1073     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1074     if(i > 1){
1075         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1076         avctx->time_base.den /= i;
1077         avctx->time_base.num /= i;
1078 //        return -1;
1079     }
1080
1081     if(s->codec_id==CODEC_ID_MJPEG){
1082         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1083         s->inter_quant_bias= 0;
1084     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1085         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1086         s->inter_quant_bias= 0;
1087     }else{
1088         s->intra_quant_bias=0;
1089         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1090     }
1091
1092     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1093         s->intra_quant_bias= avctx->intra_quant_bias;
1094     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1095         s->inter_quant_bias= avctx->inter_quant_bias;
1096
1097     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1098
1099     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1100         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1101         return -1;
1102     }
1103     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1104
1105     switch(avctx->codec->id) {
1106     case CODEC_ID_MPEG1VIDEO:
1107         s->out_format = FMT_MPEG1;
1108         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1109         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1110         break;
1111     case CODEC_ID_MPEG2VIDEO:
1112         s->out_format = FMT_MPEG1;
1113         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1114         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1115         s->rtp_mode= 1;
1116         break;
1117     case CODEC_ID_LJPEG:
1118     case CODEC_ID_MJPEG:
1119         s->out_format = FMT_MJPEG;
1120         s->intra_only = 1; /* force intra only for jpeg */
1121         s->mjpeg_write_tables = 1; /* write all tables */
1122         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1123         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1124         s->mjpeg_vsample[1] = 1;
1125         s->mjpeg_vsample[2] = 1;
1126         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1127         s->mjpeg_hsample[1] = 1;
1128         s->mjpeg_hsample[2] = 1;
1129         if (mjpeg_init(s) < 0)
1130             return -1;
1131         avctx->delay=0;
1132         s->low_delay=1;
1133         break;
1134     case CODEC_ID_H261:
1135         s->out_format = FMT_H261;
1136         avctx->delay=0;
1137         s->low_delay=1;
1138         break;
1139     case CODEC_ID_H263:
1140         if (h263_get_picture_format(s->width, s->height) == 7) {
1141             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1142             return -1;
1143         }
1144         s->out_format = FMT_H263;
1145         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1146         avctx->delay=0;
1147         s->low_delay=1;
1148         break;
1149     case CODEC_ID_H263P:
1150         s->out_format = FMT_H263;
1151         s->h263_plus = 1;
1152         /* Fx */
1153         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1154         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1155         s->modified_quant= s->h263_aic;
1156         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1157         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1158         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1159         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1160         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1161
1162         /* /Fx */
1163         /* These are just to be sure */
1164         avctx->delay=0;
1165         s->low_delay=1;
1166         break;
1167     case CODEC_ID_FLV1:
1168         s->out_format = FMT_H263;
1169         s->h263_flv = 2; /* format = 1; 11-bit codes */
1170         s->unrestricted_mv = 1;
1171         s->rtp_mode=0; /* don't allow GOB */
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175     case CODEC_ID_RV10:
1176         s->out_format = FMT_H263;
1177         avctx->delay=0;
1178         s->low_delay=1;
1179         break;
1180     case CODEC_ID_RV20:
1181         s->out_format = FMT_H263;
1182         avctx->delay=0;
1183         s->low_delay=1;
1184         s->modified_quant=1;
1185         s->h263_aic=1;
1186         s->h263_plus=1;
1187         s->loop_filter=1;
1188         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1189         break;
1190     case CODEC_ID_MPEG4:
1191         s->out_format = FMT_H263;
1192         s->h263_pred = 1;
1193         s->unrestricted_mv = 1;
1194         s->low_delay= s->max_b_frames ? 0 : 1;
1195         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1196         break;
1197     case CODEC_ID_MSMPEG4V1:
1198         s->out_format = FMT_H263;
1199         s->h263_msmpeg4 = 1;
1200         s->h263_pred = 1;
1201         s->unrestricted_mv = 1;
1202         s->msmpeg4_version= 1;
1203         avctx->delay=0;
1204         s->low_delay=1;
1205         break;
1206     case CODEC_ID_MSMPEG4V2:
1207         s->out_format = FMT_H263;
1208         s->h263_msmpeg4 = 1;
1209         s->h263_pred = 1;
1210         s->unrestricted_mv = 1;
1211         s->msmpeg4_version= 2;
1212         avctx->delay=0;
1213         s->low_delay=1;
1214         break;
1215     case CODEC_ID_MSMPEG4V3:
1216         s->out_format = FMT_H263;
1217         s->h263_msmpeg4 = 1;
1218         s->h263_pred = 1;
1219         s->unrestricted_mv = 1;
1220         s->msmpeg4_version= 3;
1221         s->flipflop_rounding=1;
1222         avctx->delay=0;
1223         s->low_delay=1;
1224         break;
1225     case CODEC_ID_WMV1:
1226         s->out_format = FMT_H263;
1227         s->h263_msmpeg4 = 1;
1228         s->h263_pred = 1;
1229         s->unrestricted_mv = 1;
1230         s->msmpeg4_version= 4;
1231         s->flipflop_rounding=1;
1232         avctx->delay=0;
1233         s->low_delay=1;
1234         break;
1235     case CODEC_ID_WMV2:
1236         s->out_format = FMT_H263;
1237         s->h263_msmpeg4 = 1;
1238         s->h263_pred = 1;
1239         s->unrestricted_mv = 1;
1240         s->msmpeg4_version= 5;
1241         s->flipflop_rounding=1;
1242         avctx->delay=0;
1243         s->low_delay=1;
1244         break;
1245     default:
1246         return -1;
1247     }
1248
1249     avctx->has_b_frames= !s->low_delay;
1250
1251     s->encoding = 1;
1252
1253     /* init */
1254     if (MPV_common_init(s) < 0)
1255         return -1;
1256
1257     if(s->modified_quant)
1258         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1259     s->progressive_frame=
1260     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1261     s->quant_precision=5;
1262
1263     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1264     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1265
1266 #ifdef CONFIG_H261_ENCODER
1267     if (s->out_format == FMT_H261)
1268         ff_h261_encode_init(s);
1269 #endif
1270     if (s->out_format == FMT_H263)
1271         h263_encode_init(s);
1272     if(s->msmpeg4_version)
1273         ff_msmpeg4_encode_init(s);
1274     if (s->out_format == FMT_MPEG1)
1275         ff_mpeg1_encode_init(s);
1276
1277     /* init q matrix */
1278     for(i=0;i<64;i++) {
1279         int j= s->dsp.idct_permutation[i];
1280         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1281             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1282             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1283         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1284             s->intra_matrix[j] =
1285             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1286         }else
1287         { /* mpeg1/2 */
1288             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1289             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1290         }
1291         if(s->avctx->intra_matrix)
1292             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1293         if(s->avctx->inter_matrix)
1294             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1295     }
1296
1297     /* precompute matrix */
1298     /* for mjpeg, we do include qscale in the matrix */
1299     if (s->out_format != FMT_MJPEG) {
1300         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1301                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1302         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1303                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1304     }
1305
1306     if(ff_rate_control_init(s) < 0)
1307         return -1;
1308
1309     return 0;
1310 }
1311
1312 int MPV_encode_end(AVCodecContext *avctx)
1313 {
1314     MpegEncContext *s = avctx->priv_data;
1315
1316 #ifdef STATS
1317     print_stats();
1318 #endif
1319
1320     ff_rate_control_uninit(s);
1321
1322     MPV_common_end(s);
1323     if (s->out_format == FMT_MJPEG)
1324         mjpeg_close(s);
1325
1326     av_freep(&avctx->extradata);
1327
1328     return 0;
1329 }
1330
1331 #endif //CONFIG_ENCODERS
1332
1333 void init_rl(RLTable *rl, int use_static)
1334 {
1335     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1336     uint8_t index_run[MAX_RUN+1];
1337     int last, run, level, start, end, i;
1338
1339     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1340     if(use_static && rl->max_level[0])
1341         return;
1342
1343     /* compute max_level[], max_run[] and index_run[] */
1344     for(last=0;last<2;last++) {
1345         if (last == 0) {
1346             start = 0;
1347             end = rl->last;
1348         } else {
1349             start = rl->last;
1350             end = rl->n;
1351         }
1352
1353         memset(max_level, 0, MAX_RUN + 1);
1354         memset(max_run, 0, MAX_LEVEL + 1);
1355         memset(index_run, rl->n, MAX_RUN + 1);
1356         for(i=start;i<end;i++) {
1357             run = rl->table_run[i];
1358             level = rl->table_level[i];
1359             if (index_run[run] == rl->n)
1360                 index_run[run] = i;
1361             if (level > max_level[run])
1362                 max_level[run] = level;
1363             if (run > max_run[level])
1364                 max_run[level] = run;
1365         }
1366         if(use_static)
1367             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1368         else
1369             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1370         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1371         if(use_static)
1372             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1373         else
1374             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1375         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1376         if(use_static)
1377             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1378         else
1379             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1380         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1381     }
1382 }
1383
1384 /* draw the edges of width 'w' of an image of size width, height */
1385 //FIXME check that this is ok for mpeg4 interlaced
1386 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1387 {
1388     uint8_t *ptr, *last_line;
1389     int i;
1390
1391     last_line = buf + (height - 1) * wrap;
1392     for(i=0;i<w;i++) {
1393         /* top and bottom */
1394         memcpy(buf - (i + 1) * wrap, buf, width);
1395         memcpy(last_line + (i + 1) * wrap, last_line, width);
1396     }
1397     /* left and right */
1398     ptr = buf;
1399     for(i=0;i<height;i++) {
1400         memset(ptr - w, ptr[0], w);
1401         memset(ptr + width, ptr[width-1], w);
1402         ptr += wrap;
1403     }
1404     /* corners */
1405     for(i=0;i<w;i++) {
1406         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1407         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1408         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1409         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1410     }
1411 }
1412
1413 int ff_find_unused_picture(MpegEncContext *s, int shared){
1414     int i;
1415
1416     if(shared){
1417         for(i=0; i<MAX_PICTURE_COUNT; i++){
1418             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1419         }
1420     }else{
1421         for(i=0; i<MAX_PICTURE_COUNT; i++){
1422             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1423         }
1424         for(i=0; i<MAX_PICTURE_COUNT; i++){
1425             if(s->picture[i].data[0]==NULL) return i;
1426         }
1427     }
1428
1429     assert(0);
1430     return -1;
1431 }
1432
1433 static void update_noise_reduction(MpegEncContext *s){
1434     int intra, i;
1435
1436     for(intra=0; intra<2; intra++){
1437         if(s->dct_count[intra] > (1<<16)){
1438             for(i=0; i<64; i++){
1439                 s->dct_error_sum[intra][i] >>=1;
1440             }
1441             s->dct_count[intra] >>= 1;
1442         }
1443
1444         for(i=0; i<64; i++){
1445             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1446         }
1447     }
1448 }
1449
1450 /**
1451  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1452  */
1453 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1454 {
1455     int i;
1456     AVFrame *pic;
1457     s->mb_skipped = 0;
1458
1459     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1460
1461     /* mark&release old frames */
1462     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1463         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1464
1465         /* release forgotten pictures */
1466         /* if(mpeg124/h263) */
1467         if(!s->encoding){
1468             for(i=0; i<MAX_PICTURE_COUNT; i++){
1469                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1470                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1471                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1472                 }
1473             }
1474         }
1475     }
1476 alloc:
1477     if(!s->encoding){
1478         /* release non reference frames */
1479         for(i=0; i<MAX_PICTURE_COUNT; i++){
1480             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1481                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1482             }
1483         }
1484
1485         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1486             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1487         else{
1488             i= ff_find_unused_picture(s, 0);
1489             pic= (AVFrame*)&s->picture[i];
1490         }
1491
1492         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1493                         && !s->dropable ? 3 : 0;
1494
1495         pic->coded_picture_number= s->coded_picture_number++;
1496
1497         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1498             return -1;
1499
1500         s->current_picture_ptr= (Picture*)pic;
1501         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1502         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1503     }
1504
1505     s->current_picture_ptr->pict_type= s->pict_type;
1506 //    if(s->flags && CODEC_FLAG_QSCALE)
1507   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1508     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1509
1510     copy_picture(&s->current_picture, s->current_picture_ptr);
1511
1512   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1513     if (s->pict_type != B_TYPE) {
1514         s->last_picture_ptr= s->next_picture_ptr;
1515         if(!s->dropable)
1516             s->next_picture_ptr= s->current_picture_ptr;
1517     }
1518 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1519         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1520         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1521         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1522         s->pict_type, s->dropable);*/
1523
1524     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1525     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1526
1527     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1528         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1529         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1530         goto alloc;
1531     }
1532
1533     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1534
1535     if(s->picture_structure!=PICT_FRAME){
1536         int i;
1537         for(i=0; i<4; i++){
1538             if(s->picture_structure == PICT_BOTTOM_FIELD){
1539                  s->current_picture.data[i] += s->current_picture.linesize[i];
1540             }
1541             s->current_picture.linesize[i] *= 2;
1542             s->last_picture.linesize[i] *=2;
1543             s->next_picture.linesize[i] *=2;
1544         }
1545     }
1546   }
1547
1548     s->hurry_up= s->avctx->hurry_up;
1549     s->error_resilience= avctx->error_resilience;
1550
1551     /* set dequantizer, we can't do it during init as it might change for mpeg4
1552        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1553     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1554         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1555         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1556     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1557         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1558         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1559     }else{
1560         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1561         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1562     }
1563
1564     if(s->dct_error_sum){
1565         assert(s->avctx->noise_reduction && s->encoding);
1566
1567         update_noise_reduction(s);
1568     }
1569
1570 #ifdef HAVE_XVMC
1571     if(s->avctx->xvmc_acceleration)
1572         return XVMC_field_start(s, avctx);
1573 #endif
1574     return 0;
1575 }
1576
1577 /* generic function for encode/decode called after a frame has been coded/decoded */
1578 void MPV_frame_end(MpegEncContext *s)
1579 {
1580     int i;
1581     /* draw edge for correct motion prediction if outside */
1582 #ifdef HAVE_XVMC
1583 //just to make sure that all data is rendered.
1584     if(s->avctx->xvmc_acceleration){
1585         XVMC_field_end(s);
1586     }else
1587 #endif
1588     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1589             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1590             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1591             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1592     }
1593     emms_c();
1594
1595     s->last_pict_type    = s->pict_type;
1596     if(s->pict_type!=B_TYPE){
1597         s->last_non_b_pict_type= s->pict_type;
1598     }
1599 #if 0
1600         /* copy back current_picture variables */
1601     for(i=0; i<MAX_PICTURE_COUNT; i++){
1602         if(s->picture[i].data[0] == s->current_picture.data[0]){
1603             s->picture[i]= s->current_picture;
1604             break;
1605         }
1606     }
1607     assert(i<MAX_PICTURE_COUNT);
1608 #endif
1609
1610     if(s->encoding){
1611         /* release non-reference frames */
1612         for(i=0; i<MAX_PICTURE_COUNT; i++){
1613             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1614                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1615             }
1616         }
1617     }
1618     // clear copies, to avoid confusion
1619 #if 0
1620     memset(&s->last_picture, 0, sizeof(Picture));
1621     memset(&s->next_picture, 0, sizeof(Picture));
1622     memset(&s->current_picture, 0, sizeof(Picture));
1623 #endif
1624     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1625 }
1626
1627 /**
1628  * draws an line from (ex, ey) -> (sx, sy).
1629  * @param w width of the image
1630  * @param h height of the image
1631  * @param stride stride/linesize of the image
1632  * @param color color of the arrow
1633  */
1634 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1635     int t, x, y, fr, f;
1636
1637     sx= clip(sx, 0, w-1);
1638     sy= clip(sy, 0, h-1);
1639     ex= clip(ex, 0, w-1);
1640     ey= clip(ey, 0, h-1);
1641
1642     buf[sy*stride + sx]+= color;
1643
1644     if(ABS(ex - sx) > ABS(ey - sy)){
1645         if(sx > ex){
1646             t=sx; sx=ex; ex=t;
1647             t=sy; sy=ey; ey=t;
1648         }
1649         buf+= sx + sy*stride;
1650         ex-= sx;
1651         f= ((ey-sy)<<16)/ex;
1652         for(x= 0; x <= ex; x++){
1653             y = (x*f)>>16;
1654             fr= (x*f)&0xFFFF;
1655             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1656             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1657         }
1658     }else{
1659         if(sy > ey){
1660             t=sx; sx=ex; ex=t;
1661             t=sy; sy=ey; ey=t;
1662         }
1663         buf+= sx + sy*stride;
1664         ey-= sy;
1665         if(ey) f= ((ex-sx)<<16)/ey;
1666         else   f= 0;
1667         for(y= 0; y <= ey; y++){
1668             x = (y*f)>>16;
1669             fr= (y*f)&0xFFFF;
1670             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1671             buf[y*stride + x+1]+= (color*         fr )>>16;;
1672         }
1673     }
1674 }
1675
1676 /**
1677  * draws an arrow from (ex, ey) -> (sx, sy).
1678  * @param w width of the image
1679  * @param h height of the image
1680  * @param stride stride/linesize of the image
1681  * @param color color of the arrow
1682  */
1683 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1684     int dx,dy;
1685
1686     sx= clip(sx, -100, w+100);
1687     sy= clip(sy, -100, h+100);
1688     ex= clip(ex, -100, w+100);
1689     ey= clip(ey, -100, h+100);
1690
1691     dx= ex - sx;
1692     dy= ey - sy;
1693
1694     if(dx*dx + dy*dy > 3*3){
1695         int rx=  dx + dy;
1696         int ry= -dx + dy;
1697         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1698
1699         //FIXME subpixel accuracy
1700         rx= ROUNDED_DIV(rx*3<<4, length);
1701         ry= ROUNDED_DIV(ry*3<<4, length);
1702
1703         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1704         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1705     }
1706     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1707 }
1708
1709 /**
1710  * prints debuging info for the given picture.
1711  */
1712 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1713
1714     if(!pict || !pict->mb_type) return;
1715
1716     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1717         int x,y;
1718
1719         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1720         switch (pict->pict_type) {
1721             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1722             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1723             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1724             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1725             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1726             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1727         }
1728         for(y=0; y<s->mb_height; y++){
1729             for(x=0; x<s->mb_width; x++){
1730                 if(s->avctx->debug&FF_DEBUG_SKIP){
1731                     int count= s->mbskip_table[x + y*s->mb_stride];
1732                     if(count>9) count=9;
1733                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1734                 }
1735                 if(s->avctx->debug&FF_DEBUG_QP){
1736                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1737                 }
1738                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1739                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1740                     //Type & MV direction
1741                     if(IS_PCM(mb_type))
1742                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1743                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1744                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1745                     else if(IS_INTRA4x4(mb_type))
1746                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1747                     else if(IS_INTRA16x16(mb_type))
1748                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1749                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1750                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1751                     else if(IS_DIRECT(mb_type))
1752                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1753                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1754                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1755                     else if(IS_GMC(mb_type))
1756                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1757                     else if(IS_SKIP(mb_type))
1758                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1759                     else if(!USES_LIST(mb_type, 1))
1760                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1761                     else if(!USES_LIST(mb_type, 0))
1762                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1763                     else{
1764                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1765                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1766                     }
1767
1768                     //segmentation
1769                     if(IS_8X8(mb_type))
1770                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1771                     else if(IS_16X8(mb_type))
1772                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1773                     else if(IS_8X16(mb_type))
1774                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1775                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1776                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1777                     else
1778                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1779
1780
1781                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1782                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1783                     else
1784                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1785                 }
1786 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1787             }
1788             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1789         }
1790     }
1791
1792     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1793         const int shift= 1 + s->quarter_sample;
1794         int mb_y;
1795         uint8_t *ptr;
1796         int i;
1797         int h_chroma_shift, v_chroma_shift;
1798         const int width = s->avctx->width;
1799         const int height= s->avctx->height;
1800         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1801         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1802         s->low_delay=0; //needed to see the vectors without trashing the buffers
1803
1804         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1805         for(i=0; i<3; i++){
1806             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1807             pict->data[i]= s->visualization_buffer[i];
1808         }
1809         pict->type= FF_BUFFER_TYPE_COPY;
1810         ptr= pict->data[0];
1811
1812         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1813             int mb_x;
1814             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1815                 const int mb_index= mb_x + mb_y*s->mb_stride;
1816                 if((s->avctx->debug_mv) && pict->motion_val){
1817                   int type;
1818                   for(type=0; type<3; type++){
1819                     int direction = 0;
1820                     switch (type) {
1821                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1822                                 continue;
1823                               direction = 0;
1824                               break;
1825                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1826                                 continue;
1827                               direction = 0;
1828                               break;
1829                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1830                                 continue;
1831                               direction = 1;
1832                               break;
1833                     }
1834                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1835                         continue;
1836
1837                     if(IS_8X8(pict->mb_type[mb_index])){
1838                       int i;
1839                       for(i=0; i<4; i++){
1840                         int sx= mb_x*16 + 4 + 8*(i&1);
1841                         int sy= mb_y*16 + 4 + 8*(i>>1);
1842                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1843                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1844                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1845                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1846                       }
1847                     }else if(IS_16X8(pict->mb_type[mb_index])){
1848                       int i;
1849                       for(i=0; i<2; i++){
1850                         int sx=mb_x*16 + 8;
1851                         int sy=mb_y*16 + 4 + 8*i;
1852                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1853                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1854                         int my=(pict->motion_val[direction][xy][1]>>shift);
1855
1856                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1857                             my*=2;
1858
1859                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1860                       }
1861                     }else if(IS_8X16(pict->mb_type[mb_index])){
1862                       int i;
1863                       for(i=0; i<2; i++){
1864                         int sx=mb_x*16 + 4 + 8*i;
1865                         int sy=mb_y*16 + 8;
1866                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1867                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1868                         int my=(pict->motion_val[direction][xy][1]>>shift);
1869
1870                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1871                             my*=2;
1872
1873                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1874                       }
1875                     }else{
1876                       int sx= mb_x*16 + 8;
1877                       int sy= mb_y*16 + 8;
1878                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1879                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1880                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1881                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1882                     }
1883                   }
1884                 }
1885                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1886                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1887                     int y;
1888                     for(y=0; y<8; y++){
1889                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1890                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1891                     }
1892                 }
1893                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1894                     int mb_type= pict->mb_type[mb_index];
1895                     uint64_t u,v;
1896                     int y;
1897 #define COLOR(theta, r)\
1898 u= (int)(128 + r*cos(theta*3.141592/180));\
1899 v= (int)(128 + r*sin(theta*3.141592/180));
1900
1901
1902                     u=v=128;
1903                     if(IS_PCM(mb_type)){
1904                         COLOR(120,48)
1905                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1906                         COLOR(30,48)
1907                     }else if(IS_INTRA4x4(mb_type)){
1908                         COLOR(90,48)
1909                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1910 //                        COLOR(120,48)
1911                     }else if(IS_DIRECT(mb_type)){
1912                         COLOR(150,48)
1913                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1914                         COLOR(170,48)
1915                     }else if(IS_GMC(mb_type)){
1916                         COLOR(190,48)
1917                     }else if(IS_SKIP(mb_type)){
1918 //                        COLOR(180,48)
1919                     }else if(!USES_LIST(mb_type, 1)){
1920                         COLOR(240,48)
1921                     }else if(!USES_LIST(mb_type, 0)){
1922                         COLOR(0,48)
1923                     }else{
1924                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1925                         COLOR(300,48)
1926                     }
1927
1928                     u*= 0x0101010101010101ULL;
1929                     v*= 0x0101010101010101ULL;
1930                     for(y=0; y<8; y++){
1931                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1932                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1933                     }
1934
1935                     //segmentation
1936                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1937                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1938                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1939                     }
1940                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1941                         for(y=0; y<16; y++)
1942                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1943                     }
1944                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1945                         int dm= 1 << (mv_sample_log2-2);
1946                         for(i=0; i<4; i++){
1947                             int sx= mb_x*16 + 8*(i&1);
1948                             int sy= mb_y*16 + 8*(i>>1);
1949                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1950                             //FIXME bidir
1951                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1952                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1953                                 for(y=0; y<8; y++)
1954                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1955                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1956                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1957                         }
1958                     }
1959
1960                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1961                         // hmm
1962                     }
1963                 }
1964                 s->mbskip_table[mb_index]=0;
1965             }
1966         }
1967     }
1968 }
1969
1970 #ifdef CONFIG_ENCODERS
1971
1972 static int get_sae(uint8_t *src, int ref, int stride){
1973     int x,y;
1974     int acc=0;
1975
1976     for(y=0; y<16; y++){
1977         for(x=0; x<16; x++){
1978             acc+= ABS(src[x+y*stride] - ref);
1979         }
1980     }
1981
1982     return acc;
1983 }
1984
1985 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1986     int x, y, w, h;
1987     int acc=0;
1988
1989     w= s->width &~15;
1990     h= s->height&~15;
1991
1992     for(y=0; y<h; y+=16){
1993         for(x=0; x<w; x+=16){
1994             int offset= x + y*stride;
1995             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1996             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1997             int sae = get_sae(src + offset, mean, stride);
1998
1999             acc+= sae + 500 < sad;
2000         }
2001     }
2002     return acc;
2003 }
2004
2005
2006 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2007     AVFrame *pic=NULL;
2008     int64_t pts;
2009     int i;
2010     const int encoding_delay= s->max_b_frames;
2011     int direct=1;
2012
2013     if(pic_arg){
2014         pts= pic_arg->pts;
2015         pic_arg->display_picture_number= s->input_picture_number++;
2016
2017         if(pts != AV_NOPTS_VALUE){
2018             if(s->user_specified_pts != AV_NOPTS_VALUE){
2019                 int64_t time= pts;
2020                 int64_t last= s->user_specified_pts;
2021
2022                 if(time <= last){
2023                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
2024                     return -1;
2025                 }
2026             }
2027             s->user_specified_pts= pts;
2028         }else{
2029             if(s->user_specified_pts != AV_NOPTS_VALUE){
2030                 s->user_specified_pts=
2031                 pts= s->user_specified_pts + 1;
2032                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
2033             }else{
2034                 pts= pic_arg->display_picture_number;
2035             }
2036         }
2037     }
2038
2039   if(pic_arg){
2040     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2041     if(pic_arg->linesize[0] != s->linesize) direct=0;
2042     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2043     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2044
2045 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2046
2047     if(direct){
2048         i= ff_find_unused_picture(s, 1);
2049
2050         pic= (AVFrame*)&s->picture[i];
2051         pic->reference= 3;
2052
2053         for(i=0; i<4; i++){
2054             pic->data[i]= pic_arg->data[i];
2055             pic->linesize[i]= pic_arg->linesize[i];
2056         }
2057         alloc_picture(s, (Picture*)pic, 1);
2058     }else{
2059         int offset= 16;
2060         i= ff_find_unused_picture(s, 0);
2061
2062         pic= (AVFrame*)&s->picture[i];
2063         pic->reference= 3;
2064
2065         alloc_picture(s, (Picture*)pic, 0);
2066
2067         if(   pic->data[0] + offset == pic_arg->data[0]
2068            && pic->data[1] + offset == pic_arg->data[1]
2069            && pic->data[2] + offset == pic_arg->data[2]){
2070        // empty
2071         }else{
2072             int h_chroma_shift, v_chroma_shift;
2073             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2074
2075             for(i=0; i<3; i++){
2076                 int src_stride= pic_arg->linesize[i];
2077                 int dst_stride= i ? s->uvlinesize : s->linesize;
2078                 int h_shift= i ? h_chroma_shift : 0;
2079                 int v_shift= i ? v_chroma_shift : 0;
2080                 int w= s->width >>h_shift;
2081                 int h= s->height>>v_shift;
2082                 uint8_t *src= pic_arg->data[i];
2083                 uint8_t *dst= pic->data[i] + offset;
2084
2085                 if(src_stride==dst_stride)
2086                     memcpy(dst, src, src_stride*h);
2087                 else{
2088                     while(h--){
2089                         memcpy(dst, src, w);
2090                         dst += dst_stride;
2091                         src += src_stride;
2092                     }
2093                 }
2094             }
2095         }
2096     }
2097     copy_picture_attributes(s, pic, pic_arg);
2098     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2099   }
2100
2101     /* shift buffer entries */
2102     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2103         s->input_picture[i-1]= s->input_picture[i];
2104
2105     s->input_picture[encoding_delay]= (Picture*)pic;
2106
2107     return 0;
2108 }
2109
2110 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2111     int x, y, plane;
2112     int score=0;
2113     int64_t score64=0;
2114
2115     for(plane=0; plane<3; plane++){
2116         const int stride= p->linesize[plane];
2117         const int bw= plane ? 1 : 2;
2118         for(y=0; y<s->mb_height*bw; y++){
2119             for(x=0; x<s->mb_width*bw; x++){
2120                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2121
2122                 switch(s->avctx->frame_skip_exp){
2123                     case 0: score= FFMAX(score, v); break;
2124                     case 1: score+= ABS(v);break;
2125                     case 2: score+= v*v;break;
2126                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2127                     case 4: score64+= v*v*(int64_t)(v*v);break;
2128                 }
2129             }
2130         }
2131     }
2132
2133     if(score) score64= score;
2134
2135     if(score64 < s->avctx->frame_skip_threshold)
2136         return 1;
2137     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2138         return 1;
2139     return 0;
2140 }
2141
2142 static void select_input_picture(MpegEncContext *s){
2143     int i;
2144
2145     for(i=1; i<MAX_PICTURE_COUNT; i++)
2146         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2147     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2148
2149     /* set next picture type & ordering */
2150     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2151         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2152             s->reordered_input_picture[0]= s->input_picture[0];
2153             s->reordered_input_picture[0]->pict_type= I_TYPE;
2154             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2155         }else{
2156             int b_frames;
2157
2158             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2159                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2160 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2161
2162                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2163                         for(i=0; i<4; i++)
2164                             s->input_picture[0]->data[i]= NULL;
2165                         s->input_picture[0]->type= 0;
2166                     }else{
2167                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2168                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2169
2170                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2171                     }
2172
2173                     goto no_output_pic;
2174                 }
2175             }
2176
2177             if(s->flags&CODEC_FLAG_PASS2){
2178                 for(i=0; i<s->max_b_frames+1; i++){
2179                     int pict_num= s->input_picture[0]->display_picture_number + i;
2180
2181                     if(pict_num >= s->rc_context.num_entries)
2182                         break;
2183                     if(!s->input_picture[i]){
2184                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2185                         break;
2186                     }
2187
2188                     s->input_picture[i]->pict_type=
2189                         s->rc_context.entry[pict_num].new_pict_type;
2190                 }
2191             }
2192
2193             if(s->avctx->b_frame_strategy==0){
2194                 b_frames= s->max_b_frames;
2195                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2196             }else if(s->avctx->b_frame_strategy==1){
2197                 for(i=1; i<s->max_b_frames+1; i++){
2198                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2199                         s->input_picture[i]->b_frame_score=
2200                             get_intra_count(s, s->input_picture[i  ]->data[0],
2201                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2202                     }
2203                 }
2204                 for(i=0; i<s->max_b_frames+1; i++){
2205                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2206                 }
2207
2208                 b_frames= FFMAX(0, i-1);
2209
2210                 /* reset scores */
2211                 for(i=0; i<b_frames+1; i++){
2212                     s->input_picture[i]->b_frame_score=0;
2213                 }
2214             }else{
2215                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2216                 b_frames=0;
2217             }
2218
2219             emms_c();
2220 //static int b_count=0;
2221 //b_count+= b_frames;
2222 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2223
2224             for(i= b_frames - 1; i>=0; i--){
2225                 int type= s->input_picture[i]->pict_type;
2226                 if(type && type != B_TYPE)
2227                     b_frames= i;
2228             }
2229             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2230                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2231             }
2232
2233             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2234               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2235                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2236               }else{
2237                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2238                     b_frames=0;
2239                 s->input_picture[b_frames]->pict_type= I_TYPE;
2240               }
2241             }
2242
2243             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2244                && b_frames
2245                && s->input_picture[b_frames]->pict_type== I_TYPE)
2246                 b_frames--;
2247
2248             s->reordered_input_picture[0]= s->input_picture[b_frames];
2249             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2250                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2251             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2252             for(i=0; i<b_frames; i++){
2253                 s->reordered_input_picture[i+1]= s->input_picture[i];
2254                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2255                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2256             }
2257         }
2258     }
2259 no_output_pic:
2260     if(s->reordered_input_picture[0]){
2261         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2262
2263         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2264
2265         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2266             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2267
2268             int i= ff_find_unused_picture(s, 0);
2269             Picture *pic= &s->picture[i];
2270
2271             /* mark us unused / free shared pic */
2272             for(i=0; i<4; i++)
2273                 s->reordered_input_picture[0]->data[i]= NULL;
2274             s->reordered_input_picture[0]->type= 0;
2275
2276             pic->reference              = s->reordered_input_picture[0]->reference;
2277
2278             alloc_picture(s, pic, 0);
2279
2280             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2281
2282             s->current_picture_ptr= pic;
2283         }else{
2284             // input is not a shared pix -> reuse buffer for current_pix
2285
2286             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2287                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2288
2289             s->current_picture_ptr= s->reordered_input_picture[0];
2290             for(i=0; i<4; i++){
2291                 s->new_picture.data[i]+=16;
2292             }
2293         }
2294         copy_picture(&s->current_picture, s->current_picture_ptr);
2295
2296         s->picture_number= s->new_picture.display_picture_number;
2297 //printf("dpn:%d\n", s->picture_number);
2298     }else{
2299        memset(&s->new_picture, 0, sizeof(Picture));
2300     }
2301 }
2302
2303 int MPV_encode_picture(AVCodecContext *avctx,
2304                        unsigned char *buf, int buf_size, void *data)
2305 {
2306     MpegEncContext *s = avctx->priv_data;
2307     AVFrame *pic_arg = data;
2308     int i, stuffing_count;
2309
2310     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2311         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2312         return -1;
2313     }
2314
2315     for(i=0; i<avctx->thread_count; i++){
2316         int start_y= s->thread_context[i]->start_mb_y;
2317         int   end_y= s->thread_context[i]->  end_mb_y;
2318         int h= s->mb_height;
2319         uint8_t *start= buf + buf_size*start_y/h;
2320         uint8_t *end  = buf + buf_size*  end_y/h;
2321
2322         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2323     }
2324
2325     s->picture_in_gop_number++;
2326
2327     if(load_input_picture(s, pic_arg) < 0)
2328         return -1;
2329
2330     select_input_picture(s);
2331
2332     /* output? */
2333     if(s->new_picture.data[0]){
2334         s->pict_type= s->new_picture.pict_type;
2335 //emms_c();
2336 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2337         MPV_frame_start(s, avctx);
2338
2339         encode_picture(s, s->picture_number);
2340
2341         avctx->real_pict_num  = s->picture_number;
2342         avctx->header_bits = s->header_bits;
2343         avctx->mv_bits     = s->mv_bits;
2344         avctx->misc_bits   = s->misc_bits;
2345         avctx->i_tex_bits  = s->i_tex_bits;
2346         avctx->p_tex_bits  = s->p_tex_bits;
2347         avctx->i_count     = s->i_count;
2348         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2349         avctx->skip_count  = s->skip_count;
2350
2351         MPV_frame_end(s);
2352
2353         if (s->out_format == FMT_MJPEG)
2354             mjpeg_picture_trailer(s);
2355
2356         if(s->flags&CODEC_FLAG_PASS1)
2357             ff_write_pass1_stats(s);
2358
2359         for(i=0; i<4; i++){
2360             avctx->error[i] += s->current_picture_ptr->error[i];
2361         }
2362
2363         if(s->flags&CODEC_FLAG_PASS1)
2364             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2365         flush_put_bits(&s->pb);
2366         s->frame_bits  = put_bits_count(&s->pb);
2367
2368         stuffing_count= ff_vbv_update(s, s->frame_bits);
2369         if(stuffing_count){
2370             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2371                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2372                 return -1;
2373             }
2374
2375             switch(s->codec_id){
2376             case CODEC_ID_MPEG1VIDEO:
2377             case CODEC_ID_MPEG2VIDEO:
2378                 while(stuffing_count--){
2379                     put_bits(&s->pb, 8, 0);
2380                 }
2381             break;
2382             case CODEC_ID_MPEG4:
2383                 put_bits(&s->pb, 16, 0);
2384                 put_bits(&s->pb, 16, 0x1C3);
2385                 stuffing_count -= 4;
2386                 while(stuffing_count--){
2387                     put_bits(&s->pb, 8, 0xFF);
2388                 }
2389             break;
2390             default:
2391                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2392             }
2393             flush_put_bits(&s->pb);
2394             s->frame_bits  = put_bits_count(&s->pb);
2395         }
2396
2397         /* update mpeg1/2 vbv_delay for CBR */
2398         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2399            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2400             int vbv_delay;
2401
2402             assert(s->repeat_first_field==0);
2403
2404             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2405             assert(vbv_delay < 0xFFFF);
2406
2407             s->vbv_delay_ptr[0] &= 0xF8;
2408             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2409             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2410             s->vbv_delay_ptr[2] &= 0x07;
2411             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2412         }
2413         s->total_bits += s->frame_bits;
2414         avctx->frame_bits  = s->frame_bits;
2415     }else{
2416         assert((pbBufPtr(&s->pb) == s->pb.buf));
2417         s->frame_bits=0;
2418     }
2419     assert((s->frame_bits&7)==0);
2420
2421     return s->frame_bits/8;
2422 }
2423
2424 #endif //CONFIG_ENCODERS
2425
2426 static inline void gmc1_motion(MpegEncContext *s,
2427                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2428                                uint8_t **ref_picture)
2429 {
2430     uint8_t *ptr;
2431     int offset, src_x, src_y, linesize, uvlinesize;
2432     int motion_x, motion_y;
2433     int emu=0;
2434
2435     motion_x= s->sprite_offset[0][0];
2436     motion_y= s->sprite_offset[0][1];
2437     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2438     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2439     motion_x<<=(3-s->sprite_warping_accuracy);
2440     motion_y<<=(3-s->sprite_warping_accuracy);
2441     src_x = clip(src_x, -16, s->width);
2442     if (src_x == s->width)
2443         motion_x =0;
2444     src_y = clip(src_y, -16, s->height);
2445     if (src_y == s->height)
2446         motion_y =0;
2447
2448     linesize = s->linesize;
2449     uvlinesize = s->uvlinesize;
2450
2451     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2452
2453     if(s->flags&CODEC_FLAG_EMU_EDGE){
2454         if(   (unsigned)src_x >= s->h_edge_pos - 17
2455            || (unsigned)src_y >= s->v_edge_pos - 17){
2456             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2457             ptr= s->edge_emu_buffer;
2458         }
2459     }
2460
2461     if((motion_x|motion_y)&7){
2462         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2463         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2464     }else{
2465         int dxy;
2466
2467         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2468         if (s->no_rounding){
2469             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2470         }else{
2471             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2472         }
2473     }
2474
2475     if(s->flags&CODEC_FLAG_GRAY) return;
2476
2477     motion_x= s->sprite_offset[1][0];
2478     motion_y= s->sprite_offset[1][1];
2479     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2480     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2481     motion_x<<=(3-s->sprite_warping_accuracy);
2482     motion_y<<=(3-s->sprite_warping_accuracy);
2483     src_x = clip(src_x, -8, s->width>>1);
2484     if (src_x == s->width>>1)
2485         motion_x =0;
2486     src_y = clip(src_y, -8, s->height>>1);
2487     if (src_y == s->height>>1)
2488         motion_y =0;
2489
2490     offset = (src_y * uvlinesize) + src_x;
2491     ptr = ref_picture[1] + offset;
2492     if(s->flags&CODEC_FLAG_EMU_EDGE){
2493         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2494            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2495             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2496             ptr= s->edge_emu_buffer;
2497             emu=1;
2498         }
2499     }
2500     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2501
2502     ptr = ref_picture[2] + offset;
2503     if(emu){
2504         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2505         ptr= s->edge_emu_buffer;
2506     }
2507     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2508
2509     return;
2510 }
2511
2512 static inline void gmc_motion(MpegEncContext *s,
2513                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2514                                uint8_t **ref_picture)
2515 {
2516     uint8_t *ptr;
2517     int linesize, uvlinesize;
2518     const int a= s->sprite_warping_accuracy;
2519     int ox, oy;
2520
2521     linesize = s->linesize;
2522     uvlinesize = s->uvlinesize;
2523
2524     ptr = ref_picture[0];
2525
2526     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2527     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2528
2529     s->dsp.gmc(dest_y, ptr, linesize, 16,
2530            ox,
2531            oy,
2532            s->sprite_delta[0][0], s->sprite_delta[0][1],
2533            s->sprite_delta[1][0], s->sprite_delta[1][1],
2534            a+1, (1<<(2*a+1)) - s->no_rounding,
2535            s->h_edge_pos, s->v_edge_pos);
2536     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2537            ox + s->sprite_delta[0][0]*8,
2538            oy + s->sprite_delta[1][0]*8,
2539            s->sprite_delta[0][0], s->sprite_delta[0][1],
2540            s->sprite_delta[1][0], s->sprite_delta[1][1],
2541            a+1, (1<<(2*a+1)) - s->no_rounding,
2542            s->h_edge_pos, s->v_edge_pos);
2543
2544     if(s->flags&CODEC_FLAG_GRAY) return;
2545
2546     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2547     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2548
2549     ptr = ref_picture[1];
2550     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2551            ox,
2552            oy,
2553            s->sprite_delta[0][0], s->sprite_delta[0][1],
2554            s->sprite_delta[1][0], s->sprite_delta[1][1],
2555            a+1, (1<<(2*a+1)) - s->no_rounding,
2556            s->h_edge_pos>>1, s->v_edge_pos>>1);
2557
2558     ptr = ref_picture[2];
2559     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2560            ox,
2561            oy,
2562            s->sprite_delta[0][0], s->sprite_delta[0][1],
2563            s->sprite_delta[1][0], s->sprite_delta[1][1],
2564            a+1, (1<<(2*a+1)) - s->no_rounding,
2565            s->h_edge_pos>>1, s->v_edge_pos>>1);
2566 }
2567
2568 /**
2569  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2570  * @param buf destination buffer
2571  * @param src source buffer
2572  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2573  * @param block_w width of block
2574  * @param block_h height of block
2575  * @param src_x x coordinate of the top left sample of the block in the source buffer
2576  * @param src_y y coordinate of the top left sample of the block in the source buffer
2577  * @param w width of the source buffer
2578  * @param h height of the source buffer
2579  */
2580 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2581                                     int src_x, int src_y, int w, int h){
2582     int x, y;
2583     int start_y, start_x, end_y, end_x;
2584
2585     if(src_y>= h){
2586         src+= (h-1-src_y)*linesize;
2587         src_y=h-1;
2588     }else if(src_y<=-block_h){
2589         src+= (1-block_h-src_y)*linesize;
2590         src_y=1-block_h;
2591     }
2592     if(src_x>= w){
2593         src+= (w-1-src_x);
2594         src_x=w-1;
2595     }else if(src_x<=-block_w){
2596         src+= (1-block_w-src_x);
2597         src_x=1-block_w;
2598     }
2599
2600     start_y= FFMAX(0, -src_y);
2601     start_x= FFMAX(0, -src_x);
2602     end_y= FFMIN(block_h, h-src_y);
2603     end_x= FFMIN(block_w, w-src_x);
2604
2605     // copy existing part
2606     for(y=start_y; y<end_y; y++){
2607         for(x=start_x; x<end_x; x++){
2608             buf[x + y*linesize]= src[x + y*linesize];
2609         }
2610     }
2611
2612     //top
2613     for(y=0; y<start_y; y++){
2614         for(x=start_x; x<end_x; x++){
2615             buf[x + y*linesize]= buf[x + start_y*linesize];
2616         }
2617     }
2618
2619     //bottom
2620     for(y=end_y; y<block_h; y++){
2621         for(x=start_x; x<end_x; x++){
2622             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2623         }
2624     }
2625
2626     for(y=0; y<block_h; y++){
2627        //left
2628         for(x=0; x<start_x; x++){
2629             buf[x + y*linesize]= buf[start_x + y*linesize];
2630         }
2631
2632        //right
2633         for(x=end_x; x<block_w; x++){
2634             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2635         }
2636     }
2637 }
2638
2639 static inline int hpel_motion(MpegEncContext *s,
2640                                   uint8_t *dest, uint8_t *src,
2641                                   int field_based, int field_select,
2642                                   int src_x, int src_y,
2643                                   int width, int height, int stride,
2644                                   int h_edge_pos, int v_edge_pos,
2645                                   int w, int h, op_pixels_func *pix_op,
2646                                   int motion_x, int motion_y)
2647 {
2648     int dxy;
2649     int emu=0;
2650
2651     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2652     src_x += motion_x >> 1;
2653     src_y += motion_y >> 1;
2654
2655     /* WARNING: do no forget half pels */
2656     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2657     if (src_x == width)
2658         dxy &= ~1;
2659     src_y = clip(src_y, -16, height);
2660     if (src_y == height)
2661         dxy &= ~2;
2662     src += src_y * stride + src_x;
2663
2664     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2665         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2666            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2667             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2668                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2669             src= s->edge_emu_buffer;
2670             emu=1;
2671         }
2672     }
2673     if(field_select)
2674         src += s->linesize;
2675     pix_op[dxy](dest, src, stride, h);
2676     return emu;
2677 }
2678
2679 static inline int hpel_motion_lowres(MpegEncContext *s,
2680                                   uint8_t *dest, uint8_t *src,
2681                                   int field_based, int field_select,
2682                                   int src_x, int src_y,
2683                                   int width, int height, int stride,
2684                                   int h_edge_pos, int v_edge_pos,
2685                                   int w, int h, h264_chroma_mc_func *pix_op,
2686                                   int motion_x, int motion_y)
2687 {
2688     const int lowres= s->avctx->lowres;
2689     const int s_mask= (2<<lowres)-1;
2690     int emu=0;
2691     int sx, sy;
2692
2693     if(s->quarter_sample){
2694         motion_x/=2;
2695         motion_y/=2;
2696     }
2697
2698     sx= motion_x & s_mask;
2699     sy= motion_y & s_mask;
2700     src_x += motion_x >> (lowres+1);
2701     src_y += motion_y >> (lowres+1);
2702
2703     src += src_y * stride + src_x;
2704
2705     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2706        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2707         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2708                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2709         src= s->edge_emu_buffer;
2710         emu=1;
2711     }
2712
2713     sx <<= 2 - lowres;
2714     sy <<= 2 - lowres;
2715     if(field_select)
2716         src += s->linesize;
2717     pix_op[lowres](dest, src, stride, h, sx, sy);
2718     return emu;
2719 }
2720
2721 /* apply one mpeg motion vector to the three components */
2722 static always_inline void mpeg_motion(MpegEncContext *s,
2723                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2724                                int field_based, int bottom_field, int field_select,
2725                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2726                                int motion_x, int motion_y, int h)
2727 {
2728     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2729     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2730
2731 #if 0
2732 if(s->quarter_sample)
2733 {
2734     motion_x>>=1;
2735     motion_y>>=1;
2736 }
2737 #endif
2738
2739     v_edge_pos = s->v_edge_pos >> field_based;
2740     linesize   = s->current_picture.linesize[0] << field_based;
2741     uvlinesize = s->current_picture.linesize[1] << field_based;
2742
2743     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2744     src_x = s->mb_x* 16               + (motion_x >> 1);
2745     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2746
2747     if (s->out_format == FMT_H263) {
2748         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2749             mx = (motion_x>>1)|(motion_x&1);
2750             my = motion_y >>1;
2751             uvdxy = ((my & 1) << 1) | (mx & 1);
2752             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2753             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2754         }else{
2755             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2756             uvsrc_x = src_x>>1;
2757             uvsrc_y = src_y>>1;
2758         }
2759     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2760         mx = motion_x / 4;
2761         my = motion_y / 4;
2762         uvdxy = 0;
2763         uvsrc_x = s->mb_x*8 + mx;
2764         uvsrc_y = s->mb_y*8 + my;
2765     } else {
2766         if(s->chroma_y_shift){
2767             mx = motion_x / 2;
2768             my = motion_y / 2;
2769             uvdxy = ((my & 1) << 1) | (mx & 1);
2770             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2771             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2772         } else {
2773             if(s->chroma_x_shift){
2774             //Chroma422
2775                 mx = motion_x / 2;
2776                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2777                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2778                 uvsrc_y = src_y;
2779             } else {
2780             //Chroma444
2781                 uvdxy = dxy;
2782                 uvsrc_x = src_x;
2783                 uvsrc_y = src_y;
2784             }
2785         }
2786     }
2787
2788     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2789     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2790     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2791
2792     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2793        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2794             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2795                s->codec_id == CODEC_ID_MPEG1VIDEO){
2796                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2797                 return ;
2798             }
2799             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2800                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2801             ptr_y = s->edge_emu_buffer;
2802             if(!(s->flags&CODEC_FLAG_GRAY)){
2803                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2804                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2805                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2806                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2807                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2808                 ptr_cb= uvbuf;
2809                 ptr_cr= uvbuf+16;
2810             }
2811     }
2812
2813     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2814         dest_y += s->linesize;
2815         dest_cb+= s->uvlinesize;
2816         dest_cr+= s->uvlinesize;
2817     }
2818
2819     if(field_select){
2820         ptr_y += s->linesize;
2821         ptr_cb+= s->uvlinesize;
2822         ptr_cr+= s->uvlinesize;
2823     }
2824
2825     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2826
2827     if(!(s->flags&CODEC_FLAG_GRAY)){
2828         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2829         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2830     }
2831 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2832     if(s->out_format == FMT_H261){
2833         ff_h261_loop_filter(s);
2834     }
2835 #endif
2836 }
2837
2838 /* apply one mpeg motion vector to the three components */
2839 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2840                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2841                                int field_based, int bottom_field, int field_select,
2842                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2843                                int motion_x, int motion_y, int h)
2844 {
2845     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2846     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2847     const int lowres= s->avctx->lowres;
2848     const int block_s= 8>>lowres;
2849     const int s_mask= (2<<lowres)-1;
2850     const int h_edge_pos = s->h_edge_pos >> lowres;
2851     const int v_edge_pos = s->v_edge_pos >> lowres;
2852     linesize   = s->current_picture.linesize[0] << field_based;
2853     uvlinesize = s->current_picture.linesize[1] << field_based;
2854
2855     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2856         motion_x/=2;
2857         motion_y/=2;
2858     }
2859
2860     if(field_based){
2861         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2862     }
2863
2864     sx= motion_x & s_mask;
2865     sy= motion_y & s_mask;
2866     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2867     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2868
2869     if (s->out_format == FMT_H263) {
2870         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2871         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2872         uvsrc_x = src_x>>1;
2873         uvsrc_y = src_y>>1;
2874     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2875         mx = motion_x / 4;
2876         my = motion_y / 4;
2877         uvsx = (2*mx) & s_mask;
2878         uvsy = (2*my) & s_mask;
2879         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2880         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2881     } else {
2882         mx = motion_x / 2;
2883         my = motion_y / 2;
2884         uvsx = mx & s_mask;
2885         uvsy = my & s_mask;
2886         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2887         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2888     }
2889
2890     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2891     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2892     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2893
2894     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2895        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2896             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2897                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2898             ptr_y = s->edge_emu_buffer;
2899             if(!(s->flags&CODEC_FLAG_GRAY)){
2900                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2901                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2902                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2903                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2904                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2905                 ptr_cb= uvbuf;
2906                 ptr_cr= uvbuf+16;
2907             }
2908     }
2909
2910     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2911         dest_y += s->linesize;
2912         dest_cb+= s->uvlinesize;
2913         dest_cr+= s->uvlinesize;
2914     }
2915
2916     if(field_select){
2917         ptr_y += s->linesize;
2918         ptr_cb+= s->uvlinesize;
2919         ptr_cr+= s->uvlinesize;
2920     }
2921
2922     sx <<= 2 - lowres;
2923     sy <<= 2 - lowres;
2924     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2925
2926     if(!(s->flags&CODEC_FLAG_GRAY)){
2927         uvsx <<= 2 - lowres;
2928         uvsy <<= 2 - lowres;
2929         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2930         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2931     }
2932     //FIXME h261 lowres loop filter
2933 }
2934
2935 //FIXME move to dsputil, avg variant, 16x16 version
2936 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2937     int x;
2938     uint8_t * const top   = src[1];
2939     uint8_t * const left  = src[2];
2940     uint8_t * const mid   = src[0];
2941     uint8_t * const right = src[3];
2942     uint8_t * const bottom= src[4];
2943 #define OBMC_FILTER(x, t, l, m, r, b)\
2944     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2945 #define OBMC_FILTER4(x, t, l, m, r, b)\
2946     OBMC_FILTER(x         , t, l, m, r, b);\
2947     OBMC_FILTER(x+1       , t, l, m, r, b);\
2948     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2949     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2950
2951     x=0;
2952     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2953     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2954     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2955     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2956     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2957     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2958     x+= stride;
2959     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2960     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2961     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2962     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2963     x+= stride;
2964     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2965     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2966     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2967     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2968     x+= 2*stride;
2969     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2970     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2971     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2972     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2973     x+= 2*stride;
2974     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2975     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2976     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2977     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2978     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2979     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2980     x+= stride;
2981     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2982     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2983     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2984     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2985 }
2986
2987 /* obmc for 1 8x8 luma block */
2988 static inline void obmc_motion(MpegEncContext *s,
2989                                uint8_t *dest, uint8_t *src,
2990                                int src_x, int src_y,
2991                                op_pixels_func *pix_op,
2992                                int16_t mv[5][2]/* mid top left right bottom*/)
2993 #define MID    0
2994 {
2995     int i;
2996     uint8_t *ptr[5];
2997
2998     assert(s->quarter_sample==0);
2999
3000     for(i=0; i<5; i++){
3001         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3002             ptr[i]= ptr[MID];
3003         }else{
3004             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3005             hpel_motion(s, ptr[i], src, 0, 0,
3006                         src_x, src_y,
3007                         s->width, s->height, s->linesize,
3008                         s->h_edge_pos, s->v_edge_pos,
3009                         8, 8, pix_op,
3010                         mv[i][0], mv[i][1]);
3011         }
3012     }
3013
3014     put_obmc(dest, ptr, s->linesize);
3015 }
3016
3017 static inline void qpel_motion(MpegEncContext *s,
3018                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3019                                int field_based, int bottom_field, int field_select,
3020                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3021                                qpel_mc_func (*qpix_op)[16],
3022                                int motion_x, int motion_y, int h)
3023 {
3024     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3025     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3026
3027     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3028     src_x = s->mb_x *  16                 + (motion_x >> 2);
3029     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3030
3031     v_edge_pos = s->v_edge_pos >> field_based;
3032     linesize = s->linesize << field_based;
3033     uvlinesize = s->uvlinesize << field_based;
3034
3035     if(field_based){
3036         mx= motion_x/2;
3037         my= motion_y>>1;
3038     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3039         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3040         mx= (motion_x>>1) + rtab[motion_x&7];
3041         my= (motion_y>>1) + rtab[motion_y&7];
3042     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3043         mx= (motion_x>>1)|(motion_x&1);
3044         my= (motion_y>>1)|(motion_y&1);
3045     }else{
3046         mx= motion_x/2;
3047         my= motion_y/2;
3048     }
3049     mx= (mx>>1)|(mx&1);
3050     my= (my>>1)|(my&1);
3051
3052     uvdxy= (mx&1) | ((my&1)<<1);
3053     mx>>=1;
3054     my>>=1;
3055
3056     uvsrc_x = s->mb_x *  8                 + mx;
3057     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3058
3059     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3060     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3061     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3062
3063     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3064        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3065         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3066                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3067         ptr_y= s->edge_emu_buffer;
3068         if(!(s->flags&CODEC_FLAG_GRAY)){
3069             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3070             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3071                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3072             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3073                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3074             ptr_cb= uvbuf;
3075             ptr_cr= uvbuf + 16;
3076         }
3077     }
3078
3079     if(!field_based)
3080         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3081     else{
3082         if(bottom_field){
3083             dest_y += s->linesize;
3084             dest_cb+= s->uvlinesize;
3085             dest_cr+= s->uvlinesize;
3086         }
3087
3088         if(field_select){
3089             ptr_y  += s->linesize;
3090             ptr_cb += s->uvlinesize;
3091             ptr_cr += s->uvlinesize;
3092         }
3093         //damn interlaced mode
3094         //FIXME boundary mirroring is not exactly correct here
3095         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3096         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3097     }
3098     if(!(s->flags&CODEC_FLAG_GRAY)){
3099         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3100         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3101     }
3102 }
3103
3104 inline int ff_h263_round_chroma(int x){
3105     if (x >= 0)
3106         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3107     else {
3108         x = -x;
3109         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3110     }
3111 }
3112
3113 /**
3114  * h263 chorma 4mv motion compensation.
3115  */
3116 static inline void chroma_4mv_motion(MpegEncContext *s,
3117                                      uint8_t *dest_cb, uint8_t *dest_cr,
3118                                      uint8_t **ref_picture,
3119                                      op_pixels_func *pix_op,
3120                                      int mx, int my){
3121     int dxy, emu=0, src_x, src_y, offset;
3122     uint8_t *ptr;
3123
3124     /* In case of 8X8, we construct a single chroma motion vector
3125        with a special rounding */
3126     mx= ff_h263_round_chroma(mx);
3127     my= ff_h263_round_chroma(my);
3128
3129     dxy = ((my & 1) << 1) | (mx & 1);
3130     mx >>= 1;
3131     my >>= 1;
3132
3133     src_x = s->mb_x * 8 + mx;
3134     src_y = s->mb_y * 8 + my;
3135     src_x = clip(src_x, -8, s->width/2);
3136     if (src_x == s->width/2)
3137         dxy &= ~1;
3138     src_y = clip(src_y, -8, s->height/2);
3139     if (src_y == s->height/2)
3140         dxy &= ~2;
3141
3142     offset = (src_y * (s->uvlinesize)) + src_x;
3143     ptr = ref_picture[1] + offset;
3144     if(s->flags&CODEC_FLAG_EMU_EDGE){
3145         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3146            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3147             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3148             ptr= s->edge_emu_buffer;
3149             emu=1;
3150         }
3151     }
3152     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3153
3154     ptr = ref_picture[2] + offset;
3155     if(emu){
3156         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3157         ptr= s->edge_emu_buffer;
3158     }
3159     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3160 }
3161
3162 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3163                                      uint8_t *dest_cb, uint8_t *dest_cr,
3164                                      uint8_t **ref_picture,
3165                                      h264_chroma_mc_func *pix_op,
3166                                      int mx, int my){
3167     const int lowres= s->avctx->lowres;
3168     const int block_s= 8>>lowres;
3169     const int s_mask= (2<<lowres)-1;
3170     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3171     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3172     int emu=0, src_x, src_y, offset, sx, sy;
3173     uint8_t *ptr;
3174
3175     if(s->quarter_sample){
3176         mx/=2;
3177         my/=2;
3178     }
3179
3180     /* In case of 8X8, we construct a single chroma motion vector
3181        with a special rounding */
3182     mx= ff_h263_round_chroma(mx);
3183     my= ff_h263_round_chroma(my);
3184
3185     sx= mx & s_mask;
3186     sy= my & s_mask;
3187     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3188     src_y = s->mb_y*block_s + (my >> (lowres+1));
3189
3190     offset = src_y * s->uvlinesize + src_x;
3191     ptr = ref_picture[1] + offset;
3192     if(s->flags&CODEC_FLAG_EMU_EDGE){
3193         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3194            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3195             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3196             ptr= s->edge_emu_buffer;
3197             emu=1;
3198         }
3199     }
3200     sx <<= 2 - lowres;
3201     sy <<= 2 - lowres;
3202     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3203
3204     ptr = ref_picture[2] + offset;
3205     if(emu){
3206         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3207         ptr= s->edge_emu_buffer;
3208     }
3209     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3210 }
3211
3212 /**
3213  * motion compensation of a single macroblock
3214  * @param s context
3215  * @param dest_y luma destination pointer
3216  * @param dest_cb chroma cb/u destination pointer
3217  * @param dest_cr chroma cr/v destination pointer
3218  * @param dir direction (0->forward, 1->backward)
3219  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3220  * @param pic_op halfpel motion compensation function (average or put normally)
3221  * @param pic_op qpel motion compensation function (average or put normally)
3222  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3223  */
3224 static inline void MPV_motion(MpegEncContext *s,
3225                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3226                               int dir, uint8_t **ref_picture,
3227                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3228 {
3229     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3230     int mb_x, mb_y, i;
3231     uint8_t *ptr, *dest;
3232
3233     mb_x = s->mb_x;
3234     mb_y = s->mb_y;
3235
3236     if(s->obmc && s->pict_type != B_TYPE){
3237         int16_t mv_cache[4][4][2];
3238         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3239         const int mot_stride= s->b8_stride;
3240         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3241
3242         assert(!s->mb_skipped);
3243
3244         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3245         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3246         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3247
3248         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3249             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3250         }else{
3251             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3252         }
3253
3254         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3255             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3256             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3257         }else{
3258             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3259             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3260         }
3261
3262         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3263             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3264             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3265         }else{
3266             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3267             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3268         }
3269
3270         mx = 0;
3271         my = 0;
3272         for(i=0;i<4;i++) {
3273             const int x= (i&1)+1;
3274             const int y= (i>>1)+1;
3275             int16_t mv[5][2]= {
3276                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3277                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3278                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3279                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3280                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3281             //FIXME cleanup
3282             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3283                         ref_picture[0],
3284                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3285                         pix_op[1],
3286                         mv);
3287
3288             mx += mv[0][0];
3289             my += mv[0][1];
3290         }
3291         if(!(s->flags&CODEC_FLAG_GRAY))
3292             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3293
3294         return;
3295     }
3296
3297     switch(s->mv_type) {
3298     case MV_TYPE_16X16:
3299         if(s->mcsel){
3300             if(s->real_sprite_warping_points==1){
3301                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3302                             ref_picture);
3303             }else{
3304                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3305                             ref_picture);
3306             }
3307         }else if(s->quarter_sample){
3308             qpel_motion(s, dest_y, dest_cb, dest_cr,
3309                         0, 0, 0,
3310                         ref_picture, pix_op, qpix_op,
3311                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3312         }else if(s->mspel){
3313             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3314                         ref_picture, pix_op,
3315                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3316         }else
3317         {
3318             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3319                         0, 0, 0,
3320                         ref_picture, pix_op,
3321                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3322         }
3323         break;
3324     case MV_TYPE_8X8:
3325         mx = 0;
3326         my = 0;
3327         if(s->quarter_sample){
3328             for(i=0;i<4;i++) {
3329                 motion_x = s->mv[dir][i][0];
3330                 motion_y = s->mv[dir][i][1];
3331
3332                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3333                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3334                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3335
3336                 /* WARNING: do no forget half pels */
3337                 src_x = clip(src_x, -16, s->width);
3338                 if (src_x == s->width)
3339                     dxy &= ~3;
3340                 src_y = clip(src_y, -16, s->height);
3341                 if (src_y == s->height)
3342                     dxy &= ~12;
3343
3344                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3345                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3346                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3347                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3348                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3349                         ptr= s->edge_emu_buffer;
3350                     }
3351                 }
3352                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3353                 qpix_op[1][dxy](dest, ptr, s->linesize);
3354
3355                 mx += s->mv[dir][i][0]/2;
3356                 my += s->mv[dir][i][1]/2;
3357             }
3358         }else{
3359             for(i=0;i<4;i++) {
3360                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3361                             ref_picture[0], 0, 0,
3362                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3363                             s->width, s->height, s->linesize,
3364                             s->h_edge_pos, s->v_edge_pos,
3365                             8, 8, pix_op[1],
3366                             s->mv[dir][i][0], s->mv[dir][i][1]);
3367
3368                 mx += s->mv[dir][i][0];
3369                 my += s->mv[dir][i][1];
3370             }
3371         }
3372
3373         if(!(s->flags&CODEC_FLAG_GRAY))
3374             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3375         break;
3376     case MV_TYPE_FIELD:
3377         if (s->picture_structure == PICT_FRAME) {
3378             if(s->quarter_sample){
3379                 for(i=0; i<2; i++){
3380                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3381                                 1, i, s->field_select[dir][i],
3382                                 ref_picture, pix_op, qpix_op,
3383                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3384                 }
3385             }else{
3386                 /* top field */
3387                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3388                             1, 0, s->field_select[dir][0],
3389                             ref_picture, pix_op,
3390                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3391                 /* bottom field */
3392                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3393                             1, 1, s->field_select[dir][1],
3394                             ref_picture, pix_op,
3395                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3396             }
3397         } else {
3398             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3399                 ref_picture= s->current_picture_ptr->data;
3400             }
3401
3402             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3403                         0, 0, s->field_select[dir][0],
3404                         ref_picture, pix_op,
3405                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3406         }
3407         break;
3408     case MV_TYPE_16X8:
3409         for(i=0; i<2; i++){
3410             uint8_t ** ref2picture;
3411
3412             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3413                 ref2picture= ref_picture;
3414             }else{
3415                 ref2picture= s->current_picture_ptr->data;
3416             }
3417
3418             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3419                         0, 0, s->field_select[dir][i],
3420                         ref2picture, pix_op,
3421                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3422
3423             dest_y += 16*s->linesize;
3424             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3425             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3426         }
3427         break;
3428     case MV_TYPE_DMV:
3429         if(s->picture_structure == PICT_FRAME){
3430             for(i=0; i<2; i++){
3431                 int j;
3432                 for(j=0; j<2; j++){
3433                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3434                                 1, j, j^i,
3435                                 ref_picture, pix_op,
3436                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3437                 }
3438                 pix_op = s->dsp.avg_pixels_tab;
3439             }
3440         }else{
3441             for(i=0; i<2; i++){
3442                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3443                             0, 0, s->picture_structure != i+1,
3444                             ref_picture, pix_op,
3445                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3446
3447                 // after put we make avg of the same block
3448                 pix_op=s->dsp.avg_pixels_tab;
3449
3450                 //opposite parity is always in the same frame if this is second field
3451                 if(!s->first_field){
3452                     ref_picture = s->current_picture_ptr->data;
3453                 }
3454             }
3455         }
3456     break;
3457     default: assert(0);
3458     }
3459 }
3460
3461 /**
3462  * motion compensation of a single macroblock
3463  * @param s context
3464  * @param dest_y luma destination pointer
3465  * @param dest_cb chroma cb/u destination pointer
3466  * @param dest_cr chroma cr/v destination pointer
3467  * @param dir direction (0->forward, 1->backward)
3468  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3469  * @param pic_op halfpel motion compensation function (average or put normally)
3470  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3471  */
3472 static inline void MPV_motion_lowres(MpegEncContext *s,
3473                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3474                               int dir, uint8_t **ref_picture,
3475                               h264_chroma_mc_func *pix_op)
3476 {
3477     int mx, my;
3478     int mb_x, mb_y, i;
3479     const int lowres= s->avctx->lowres;
3480     const int block_s= 8>>lowres;
3481
3482     mb_x = s->mb_x;
3483     mb_y = s->mb_y;
3484
3485     switch(s->mv_type) {
3486     case MV_TYPE_16X16:
3487         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3488                     0, 0, 0,
3489                     ref_picture, pix_op,
3490                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3491         break;
3492     case MV_TYPE_8X8:
3493         mx = 0;
3494         my = 0;
3495             for(i=0;i<4;i++) {
3496                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3497                             ref_picture[0], 0, 0,
3498                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3499                             s->width, s->height, s->linesize,
3500                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3501                             block_s, block_s, pix_op,
3502                             s->mv[dir][i][0], s->mv[dir][i][1]);
3503
3504                 mx += s->mv[dir][i][0];
3505                 my += s->mv[dir][i][1];
3506             }
3507
3508         if(!(s->flags&CODEC_FLAG_GRAY))
3509             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3510         break;
3511     case MV_TYPE_FIELD:
3512         if (s->picture_structure == PICT_FRAME) {
3513             /* top field */
3514             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3515                         1, 0, s->field_select[dir][0],
3516                         ref_picture, pix_op,
3517                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3518             /* bottom field */
3519             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3520                         1, 1, s->field_select[dir][1],
3521                         ref_picture, pix_op,
3522                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3523         } else {
3524             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3525                 ref_picture= s->current_picture_ptr->data;
3526             }
3527
3528             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3529                         0, 0, s->field_select[dir][0],
3530                         ref_picture, pix_op,
3531                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3532         }
3533         break;
3534     case MV_TYPE_16X8:
3535         for(i=0; i<2; i++){
3536             uint8_t ** ref2picture;
3537
3538             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3539                 ref2picture= ref_picture;
3540             }else{
3541                 ref2picture= s->current_picture_ptr->data;
3542             }
3543
3544             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3545                         0, 0, s->field_select[dir][i],
3546                         ref2picture, pix_op,
3547                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3548
3549             dest_y += 2*block_s*s->linesize;
3550             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3551             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3552         }
3553         break;
3554     case MV_TYPE_DMV:
3555         if(s->picture_structure == PICT_FRAME){
3556             for(i=0; i<2; i++){
3557                 int j;
3558                 for(j=0; j<2; j++){
3559                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3560                                 1, j, j^i,
3561                                 ref_picture, pix_op,
3562                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3563                 }
3564                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3565             }
3566         }else{
3567             for(i=0; i<2; i++){
3568                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3569                             0, 0, s->picture_structure != i+1,
3570                             ref_picture, pix_op,
3571                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3572
3573                 // after put we make avg of the same block
3574                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3575
3576                 //opposite parity is always in the same frame if this is second field
3577                 if(!s->first_field){
3578                     ref_picture = s->current_picture_ptr->data;
3579                 }
3580             }
3581         }
3582     break;
3583     default: assert(0);
3584     }
3585 }
3586
3587 /* put block[] to dest[] */
3588 static inline void put_dct(MpegEncContext *s,
3589                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3590 {
3591     s->dct_unquantize_intra(s, block, i, qscale);
3592     s->dsp.idct_put (dest, line_size, block);
3593 }
3594
3595 /* add block[] to dest[] */
3596 static inline void add_dct(MpegEncContext *s,
3597                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3598 {
3599     if (s->block_last_index[i] >= 0) {
3600         s->dsp.idct_add (dest, line_size, block);
3601     }
3602 }
3603
3604 static inline void add_dequant_dct(MpegEncContext *s,
3605                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3606 {
3607     if (s->block_last_index[i] >= 0) {
3608         s->dct_unquantize_inter(s, block, i, qscale);
3609
3610         s->dsp.idct_add (dest, line_size, block);
3611     }
3612 }
3613
3614 /**
3615  * cleans dc, ac, coded_block for the current non intra MB
3616  */
3617 void ff_clean_intra_table_entries(MpegEncContext *s)
3618 {
3619     int wrap = s->b8_stride;
3620     int xy = s->block_index[0];
3621
3622     s->dc_val[0][xy           ] =
3623     s->dc_val[0][xy + 1       ] =
3624     s->dc_val[0][xy     + wrap] =
3625     s->dc_val[0][xy + 1 + wrap] = 1024;
3626     /* ac pred */
3627     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3628     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3629     if (s->msmpeg4_version>=3) {
3630         s->coded_block[xy           ] =
3631         s->coded_block[xy + 1       ] =
3632         s->coded_block[xy     + wrap] =
3633         s->coded_block[xy + 1 + wrap] = 0;
3634     }
3635     /* chroma */
3636     wrap = s->mb_stride;
3637     xy = s->mb_x + s->mb_y * wrap;
3638     s->dc_val[1][xy] =
3639     s->dc_val[2][xy] = 1024;
3640     /* ac pred */
3641     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3642     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3643
3644     s->mbintra_table[xy]= 0;
3645 }
3646
3647 /* generic function called after a macroblock has been parsed by the
3648    decoder or after it has been encoded by the encoder.
3649
3650    Important variables used:
3651    s->mb_intra : true if intra macroblock
3652    s->mv_dir   : motion vector direction
3653    s->mv_type  : motion vector type
3654    s->mv       : motion vector
3655    s->interlaced_dct : true if interlaced dct used (mpeg2)
3656  */
3657 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3658 {
3659     int mb_x, mb_y;
3660     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3661 #ifdef HAVE_XVMC
3662     if(s->avctx->xvmc_acceleration){
3663         XVMC_decode_mb(s);//xvmc uses pblocks
3664         return;
3665     }
3666 #endif
3667
3668     mb_x = s->mb_x;
3669     mb_y = s->mb_y;
3670
3671     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3672        /* save DCT coefficients */
3673        int i,j;
3674        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3675        for(i=0; i<6; i++)
3676            for(j=0; j<64; j++)
3677                *dct++ = block[i][s->dsp.idct_permutation[j]];
3678     }
3679
3680     s->current_picture.qscale_table[mb_xy]= s->qscale;
3681
3682     /* update DC predictors for P macroblocks */
3683     if (!s->mb_intra) {
3684         if (s->h263_pred || s->h263_aic) {
3685             if(s->mbintra_table[mb_xy])
3686                 ff_clean_intra_table_entries(s);
3687         } else {
3688             s->last_dc[0] =
3689             s->last_dc[1] =
3690             s->last_dc[2] = 128 << s->intra_dc_precision;
3691         }
3692     }
3693     else if (s->h263_pred || s->h263_aic)
3694         s->mbintra_table[mb_xy]=1;
3695
3696     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3697         uint8_t *dest_y, *dest_cb, *dest_cr;
3698         int dct_linesize, dct_offset;
3699         op_pixels_func (*op_pix)[4];
3700         qpel_mc_func (*op_qpix)[16];
3701         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3702         const int uvlinesize= s->current_picture.linesize[1];
3703         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3704         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3705
3706         /* avoid copy if macroblock skipped in last frame too */
3707         /* skip only during decoding as we might trash the buffers during encoding a bit */
3708         if(!s->encoding){
3709             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3710             const int age= s->current_picture.age;
3711
3712             assert(age);
3713
3714             if (s->mb_skipped) {
3715                 s->mb_skipped= 0;
3716                 assert(s->pict_type!=I_TYPE);
3717
3718                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3719                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3720
3721                 /* if previous was skipped too, then nothing to do !  */
3722                 if (*mbskip_ptr >= age && s->current_picture.reference){
3723                     return;
3724                 }
3725             } else if(!s->current_picture.reference){
3726                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3727                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3728             } else{
3729                 *mbskip_ptr = 0; /* not skipped */
3730             }
3731         }
3732
3733         dct_linesize = linesize << s->interlaced_dct;
3734         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3735
3736         if(readable){
3737             dest_y=  s->dest[0];
3738             dest_cb= s->dest[1];
3739             dest_cr= s->dest[2];
3740         }else{
3741             dest_y = s->b_scratchpad;
3742             dest_cb= s->b_scratchpad+16*linesize;
3743             dest_cr= s->b_scratchpad+32*linesize;
3744         }
3745
3746         if (!s->mb_intra) {
3747             /* motion handling */
3748             /* decoding or more than one mb_type (MC was already done otherwise) */
3749             if(!s->encoding){
3750                 if(lowres_flag){
3751                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3752
3753                     if (s->mv_dir & MV_DIR_FORWARD) {
3754                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3755                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3756                     }
3757                     if (s->mv_dir & MV_DIR_BACKWARD) {
3758                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3759                     }
3760                 }else{
3761                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3762                         op_pix = s->dsp.put_pixels_tab;
3763                         op_qpix= s->dsp.put_qpel_pixels_tab;
3764                     }else{
3765                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3766                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3767                     }
3768                     if (s->mv_dir & MV_DIR_FORWARD) {
3769                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3770                         op_pix = s->dsp.avg_pixels_tab;
3771                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3772                     }
3773                     if (s->mv_dir & MV_DIR_BACKWARD) {
3774                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3775                     }
3776                 }
3777             }
3778
3779             /* skip dequant / idct if we are really late ;) */
3780             if(s->hurry_up>1) return;
3781
3782             /* add dct residue */
3783             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3784                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3785                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3786                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3787                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3788                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3789
3790                 if(!(s->flags&CODEC_FLAG_GRAY)){
3791                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3792                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3793                 }
3794             } else if(s->codec_id != CODEC_ID_WMV2){
3795                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3796                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3797                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3798                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3799
3800                 if(!(s->flags&CODEC_FLAG_GRAY)){
3801                     if(s->chroma_y_shift){//Chroma420
3802                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3803                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3804                     }else{
3805                         //chroma422
3806                         dct_linesize = uvlinesize << s->interlaced_dct;
3807                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3808
3809                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3810                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3811                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3812                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3813                         if(!s->chroma_x_shift){//Chroma444
3814                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3815                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3816                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3817                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3818                         }
3819                     }
3820                 }//fi gray
3821             }
3822             else{
3823                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3824             }
3825         } else {
3826             /* dct only in intra block */
3827             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3828                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3829                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3830                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3831                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3832
3833                 if(!(s->flags&CODEC_FLAG_GRAY)){
3834                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3835                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3836                 }
3837             }else{
3838                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3839                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3840                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3841                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3842
3843                 if(!(s->flags&CODEC_FLAG_GRAY)){
3844                     if(s->chroma_y_shift){
3845                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3846                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3847                     }else{
3848
3849                         dct_linesize = uvlinesize << s->interlaced_dct;
3850                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3851
3852                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3853                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3854                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3855                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3856                         if(!s->chroma_x_shift){//Chroma444
3857                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3858                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3859                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3860                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3861                         }
3862                     }
3863                 }//gray
3864             }
3865         }
3866         if(!readable){
3867             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3868             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3869             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3870         }
3871     }
3872 }
3873
3874 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3875     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3876     else                  MPV_decode_mb_internal(s, block, 0);
3877 }
3878
3879 #ifdef CONFIG_ENCODERS
3880
3881 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3882 {
3883     static const char tab[64]=
3884         {3,2,2,1,1,1,1,1,
3885          1,1,1,1,1,1,1,1,
3886          1,1,1,1,1,1,1,1,
3887          0,0,0,0,0,0,0,0,
3888          0,0,0,0,0,0,0,0,
3889          0,0,0,0,0,0,0,0,
3890          0,0,0,0,0,0,0,0,
3891          0,0,0,0,0,0,0,0};
3892     int score=0;
3893     int run=0;
3894     int i;
3895     DCTELEM *block= s->block[n];
3896     const int last_index= s->block_last_index[n];
3897     int skip_dc;
3898
3899     if(threshold<0){
3900         skip_dc=0;
3901         threshold= -threshold;
3902     }else
3903         skip_dc=1;
3904
3905     /* are all which we could set to zero are allready zero? */
3906     if(last_index<=skip_dc - 1) return;
3907
3908     for(i=0; i<=last_index; i++){
3909         const int j = s->intra_scantable.permutated[i];
3910         const int level = ABS(block[j]);
3911         if(level==1){
3912             if(skip_dc && i==0) continue;
3913             score+= tab[run];
3914             run=0;
3915         }else if(level>1){
3916             return;
3917         }else{
3918             run++;
3919         }
3920     }
3921     if(score >= threshold) return;
3922     for(i=skip_dc; i<=last_index; i++){
3923         const int j = s->intra_scantable.permutated[i];
3924         block[j]=0;
3925     }
3926     if(block[0]) s->block_last_index[n]= 0;
3927     else         s->block_last_index[n]= -1;
3928 }
3929
3930 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3931 {
3932     int i;
3933     const int maxlevel= s->max_qcoeff;
3934     const int minlevel= s->min_qcoeff;
3935     int overflow=0;
3936
3937     if(s->mb_intra){
3938         i=1; //skip clipping of intra dc
3939     }else
3940         i=0;
3941
3942     for(;i<=last_index; i++){
3943         const int j= s->intra_scantable.permutated[i];
3944         int level = block[j];
3945
3946         if     (level>maxlevel){
3947             level=maxlevel;
3948             overflow++;
3949         }else if(level<minlevel){
3950             level=minlevel;
3951             overflow++;
3952         }
3953
3954         block[j]= level;
3955     }
3956
3957     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3958         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
3959 }
3960
3961 #endif //CONFIG_ENCODERS
3962
3963 /**
3964  *
3965  * @param h is the normal height, this will be reduced automatically if needed for the last row
3966  */
3967 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3968     if (s->avctx->draw_horiz_band) {
3969         AVFrame *src;
3970         int offset[4];
3971
3972         if(s->picture_structure != PICT_FRAME){
3973             h <<= 1;
3974             y <<= 1;
3975             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3976         }
3977
3978         h= FFMIN(h, s->avctx->height - y);
3979
3980         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3981             src= (AVFrame*)s->current_picture_ptr;
3982         else if(s->last_picture_ptr)
3983             src= (AVFrame*)s->last_picture_ptr;
3984         else
3985             return;
3986
3987         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3988             offset[0]=
3989             offset[1]=
3990             offset[2]=
3991             offset[3]= 0;
3992         }else{
3993             offset[0]= y * s->linesize;;
3994             offset[1]=
3995             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3996             offset[3]= 0;
3997         }
3998
3999         emms_c();
4000
4001         s->avctx->draw_horiz_band(s->avctx, src, offset,
4002                                   y, s->picture_structure, h);
4003     }
4004 }
4005
4006 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4007     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4008     const int uvlinesize= s->current_picture.linesize[1];
4009     const int mb_size= 4 - s->avctx->lowres;
4010
4011     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4012     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4013     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4014     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4015     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4016     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4017     //block_index is not used by mpeg2, so it is not affected by chroma_format
4018
4019     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4020     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4021     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4022
4023     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4024     {
4025         s->dest[0] += s->mb_y *   linesize << mb_size;
4026         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4027         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4028     }
4029 }
4030
4031 #ifdef CONFIG_ENCODERS
4032
4033 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4034     int x, y;
4035 //FIXME optimize
4036     for(y=0; y<8; y++){
4037         for(x=0; x<8; x++){
4038             int x2, y2;
4039             int sum=0;
4040             int sqr=0;
4041             int count=0;
4042
4043             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4044                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4045                     int v= ptr[x2 + y2*stride];
4046                     sum += v;
4047                     sqr += v*v;
4048                     count++;
4049                 }
4050             }
4051             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4052         }
4053     }
4054 }
4055
4056 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4057 {
4058     int16_t weight[6][64];
4059     DCTELEM orig[6][64];
4060     const int mb_x= s->mb_x;
4061     const int mb_y= s->mb_y;
4062     int i;
4063     int skip_dct[6];
4064     int dct_offset   = s->linesize*8; //default for progressive frames
4065     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4066     int wrap_y, wrap_c;
4067
4068     for(i=0; i<6; i++) skip_dct[i]=0;
4069
4070     if(s->adaptive_quant){
4071         const int last_qp= s->qscale;
4072         const int mb_xy= mb_x + mb_y*s->mb_stride;
4073
4074         s->lambda= s->lambda_table[mb_xy];
4075         update_qscale(s);
4076
4077         if(!(s->flags&CODEC_FLAG_QP_RD)){
4078             s->dquant= s->qscale - last_qp;
4079
4080             if(s->out_format==FMT_H263){
4081                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4082
4083                 if(s->codec_id==CODEC_ID_MPEG4){
4084                     if(!s->mb_intra){
4085                         if(s->pict_type == B_TYPE){
4086                             if(s->dquant&1)
4087                                 s->dquant= (s->dquant/2)*2;
4088                             if(s->mv_dir&MV_DIRECT)
4089                                 s->dquant= 0;
4090                         }
4091                         if(s->mv_type==MV_TYPE_8X8)
4092                             s->dquant=0;
4093                     }
4094                 }
4095             }
4096         }
4097         ff_set_qscale(s, last_qp + s->dquant);
4098     }else if(s->flags&CODEC_FLAG_QP_RD)
4099         ff_set_qscale(s, s->qscale + s->dquant);
4100
4101     wrap_y = s->linesize;
4102     wrap_c = s->uvlinesize;
4103     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4104     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4105     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4106
4107     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4108         uint8_t *ebuf= s->edge_emu_buffer + 32;
4109         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4110         ptr_y= ebuf;
4111         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4112         ptr_cb= ebuf+18*wrap_y;
4113         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4114         ptr_cr= ebuf+18*wrap_y+8;
4115     }
4116
4117     if (s->mb_intra) {
4118         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4119             int progressive_score, interlaced_score;
4120
4121             s->interlaced_dct=0;
4122             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4123                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4124
4125             if(progressive_score > 0){
4126                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4127                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4128                 if(progressive_score > interlaced_score){
4129                     s->interlaced_dct=1;
4130
4131                     dct_offset= wrap_y;
4132                     wrap_y<<=1;
4133                 }
4134             }
4135         }
4136
4137         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4138         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4139         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4140         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4141
4142         if(s->flags&CODEC_FLAG_GRAY){
4143             skip_dct[4]= 1;
4144             skip_dct[5]= 1;
4145         }else{
4146             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4147             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4148         }
4149     }else{
4150         op_pixels_func (*op_pix)[4];
4151         qpel_mc_func (*op_qpix)[16];
4152         uint8_t *dest_y, *dest_cb, *dest_cr;
4153
4154         dest_y  = s->dest[0];
4155         dest_cb = s->dest[1];
4156         dest_cr = s->dest[2];
4157
4158         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4159             op_pix = s->dsp.put_pixels_tab;
4160             op_qpix= s->dsp.put_qpel_pixels_tab;
4161         }else{
4162             op_pix = s->dsp.put_no_rnd_pixels_tab;
4163             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4164         }
4165
4166         if (s->mv_dir & MV_DIR_FORWARD) {
4167             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4168             op_pix = s->dsp.avg_pixels_tab;
4169             op_qpix= s->dsp.avg_qpel_pixels_tab;
4170         }
4171         if (s->mv_dir & MV_DIR_BACKWARD) {
4172             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4173         }
4174
4175         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4176             int progressive_score, interlaced_score;
4177
4178             s->interlaced_dct=0;
4179             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4180                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4181
4182             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4183
4184             if(progressive_score>0){
4185                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4186                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4187
4188                 if(progressive_score > interlaced_score){
4189                     s->interlaced_dct=1;
4190
4191                     dct_offset= wrap_y;
4192                     wrap_y<<=1;
4193                 }
4194             }
4195         }
4196
4197         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4198         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4199         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4200         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4201
4202         if(s->flags&CODEC_FLAG_GRAY){
4203             skip_dct[4]= 1;
4204             skip_dct[5]= 1;
4205         }else{
4206             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4207             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4208         }
4209         /* pre quantization */
4210         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4211             //FIXME optimize
4212             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4213             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4214             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4215             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4216             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4217             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4218         }
4219     }
4220
4221     if(s->avctx->quantizer_noise_shaping){
4222         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4223         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4224         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4225         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4226         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4227         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4228         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4229     }
4230
4231     /* DCT & quantize */
4232     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4233     {
4234         for(i=0;i<6;i++) {
4235             if(!skip_dct[i]){
4236                 int overflow;
4237                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4238             // FIXME we could decide to change to quantizer instead of clipping
4239             // JS: I don't think that would be a good idea it could lower quality instead
4240             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4241                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4242             }else
4243                 s->block_last_index[i]= -1;
4244         }
4245         if(s->avctx->quantizer_noise_shaping){
4246             for(i=0;i<6;i++) {
4247                 if(!skip_dct[i]){
4248                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4249                 }
4250             }
4251         }
4252
4253         if(s->luma_elim_threshold && !s->mb_intra)
4254             for(i=0; i<4; i++)
4255                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4256         if(s->chroma_elim_threshold && !s->mb_intra)
4257             for(i=4; i<6; i++)
4258                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4259
4260         if(s->flags & CODEC_FLAG_CBP_RD){
4261             for(i=0;i<6;i++) {
4262                 if(s->block_last_index[i] == -1)
4263                     s->coded_score[i]= INT_MAX/256;
4264             }
4265         }
4266     }
4267
4268     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4269         s->block_last_index[4]=
4270         s->block_last_index[5]= 0;
4271         s->block[4][0]=
4272         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4273     }
4274
4275     //non c quantize code returns incorrect block_last_index FIXME
4276     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4277         for(i=0; i<6; i++){
4278             int j;
4279             if(s->block_last_index[i]>0){
4280                 for(j=63; j>0; j--){
4281                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4282                 }
4283                 s->block_last_index[i]= j;
4284             }
4285         }
4286     }
4287
4288     /* huffman encode */
4289     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4290     case CODEC_ID_MPEG1VIDEO:
4291     case CODEC_ID_MPEG2VIDEO:
4292         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4293     case CODEC_ID_MPEG4:
4294         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4295     case CODEC_ID_MSMPEG4V2:
4296     case CODEC_ID_MSMPEG4V3:
4297     case CODEC_ID_WMV1:
4298         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4299     case CODEC_ID_WMV2:
4300          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4301 #ifdef CONFIG_H261_ENCODER
4302     case CODEC_ID_H261:
4303         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4304 #endif
4305     case CODEC_ID_H263:
4306     case CODEC_ID_H263P:
4307     case CODEC_ID_FLV1:
4308     case CODEC_ID_RV10:
4309     case CODEC_ID_RV20:
4310         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4311     case CODEC_ID_MJPEG:
4312         mjpeg_encode_mb(s, s->block); break;
4313     default:
4314         assert(0);
4315     }
4316 }
4317
4318 #endif //CONFIG_ENCODERS
4319
4320 void ff_mpeg_flush(AVCodecContext *avctx){
4321     int i;
4322     MpegEncContext *s = avctx->priv_data;
4323
4324     if(s==NULL || s->picture==NULL)
4325         return;
4326
4327     for(i=0; i<MAX_PICTURE_COUNT; i++){
4328        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4329                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4330         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4331     }
4332     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4333
4334     s->mb_x= s->mb_y= 0;
4335
4336     s->parse_context.state= -1;
4337     s->parse_context.frame_start_found= 0;
4338     s->parse_context.overread= 0;
4339     s->parse_context.overread_index= 0;
4340     s->parse_context.index= 0;
4341     s->parse_context.last_index= 0;
4342     s->bitstream_buffer_size=0;
4343 }
4344
4345 #ifdef CONFIG_ENCODERS
4346 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4347 {
4348     const uint16_t *srcw= (uint16_t*)src;
4349     int words= length>>4;
4350     int bits= length&15;
4351     int i;
4352
4353     if(length==0) return;
4354
4355     if(words < 16){
4356         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4357     }else if(put_bits_count(pb)&7){
4358         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4359     }else{
4360         for(i=0; put_bits_count(pb)&31; i++)
4361             put_bits(pb, 8, src[i]);
4362         flush_put_bits(pb);
4363         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4364         skip_put_bytes(pb, 2*words-i);
4365     }
4366
4367     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4368 }
4369
4370 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4371     int i;
4372
4373     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4374
4375     /* mpeg1 */
4376     d->mb_skip_run= s->mb_skip_run;
4377     for(i=0; i<3; i++)
4378         d->last_dc[i]= s->last_dc[i];
4379
4380     /* statistics */
4381     d->mv_bits= s->mv_bits;
4382     d->i_tex_bits= s->i_tex_bits;
4383     d->p_tex_bits= s->p_tex_bits;
4384     d->i_count= s->i_count;
4385     d->f_count= s->f_count;
4386     d->b_count= s->b_count;
4387     d->skip_count= s->skip_count;
4388     d->misc_bits= s->misc_bits;
4389     d->last_bits= 0;
4390
4391     d->mb_skipped= 0;
4392     d->qscale= s->qscale;
4393     d->dquant= s->dquant;
4394 }
4395
4396 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4397     int i;
4398
4399     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4400     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4401
4402     /* mpeg1 */
4403     d->mb_skip_run= s->mb_skip_run;
4404     for(i=0; i<3; i++)
4405         d->last_dc[i]= s->last_dc[i];
4406
4407     /* statistics */
4408     d->mv_bits= s->mv_bits;
4409     d->i_tex_bits= s->i_tex_bits;
4410     d->p_tex_bits= s->p_tex_bits;
4411     d->i_count= s->i_count;
4412     d->f_count= s->f_count;
4413     d->b_count= s->b_count;
4414     d->skip_count= s->skip_count;
4415     d->misc_bits= s->misc_bits;
4416
4417     d->mb_intra= s->mb_intra;
4418     d->mb_skipped= s->mb_skipped;
4419     d->mv_type= s->mv_type;
4420     d->mv_dir= s->mv_dir;
4421     d->pb= s->pb;
4422     if(s->data_partitioning){
4423         d->pb2= s->pb2;
4424         d->tex_pb= s->tex_pb;
4425     }
4426     d->block= s->block;
4427     for(i=0; i<6; i++)
4428         d->block_last_index[i]= s->block_last_index[i];
4429     d->interlaced_dct= s->interlaced_dct;
4430     d->qscale= s->qscale;
4431 }
4432
4433 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4434                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4435                            int *dmin, int *next_block, int motion_x, int motion_y)
4436 {
4437     int score;
4438     uint8_t *dest_backup[3];
4439
4440     copy_context_before_encode(s, backup, type);
4441
4442     s->block= s->blocks[*next_block];
4443     s->pb= pb[*next_block];
4444     if(s->data_partitioning){
4445         s->pb2   = pb2   [*next_block];
4446         s->tex_pb= tex_pb[*next_block];
4447     }
4448
4449     if(*next_block){
4450         memcpy(dest_backup, s->dest, sizeof(s->dest));
4451         s->dest[0] = s->rd_scratchpad;
4452         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4453         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4454         assert(s->linesize >= 32); //FIXME
4455     }
4456
4457     encode_mb(s, motion_x, motion_y);
4458
4459     score= put_bits_count(&s->pb);
4460     if(s->data_partitioning){
4461         score+= put_bits_count(&s->pb2);
4462         score+= put_bits_count(&s->tex_pb);
4463     }
4464
4465     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4466         MPV_decode_mb(s, s->block);
4467
4468         score *= s->lambda2;
4469         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4470     }
4471
4472     if(*next_block){
4473         memcpy(s->dest, dest_backup, sizeof(s->dest));
4474     }
4475
4476     if(score<*dmin){
4477         *dmin= score;
4478         *next_block^=1;
4479
4480         copy_context_after_encode(best, s, type);
4481     }
4482 }
4483
4484 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4485     uint32_t *sq = squareTbl + 256;
4486     int acc=0;
4487     int x,y;
4488
4489     if(w==16 && h==16)
4490         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4491     else if(w==8 && h==8)
4492         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4493
4494     for(y=0; y<h; y++){
4495         for(x=0; x<w; x++){
4496             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4497         }
4498     }
4499
4500     assert(acc>=0);
4501
4502     return acc;
4503 }
4504
4505 static int sse_mb(MpegEncContext *s){
4506     int w= 16;
4507     int h= 16;
4508
4509     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4510     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4511
4512     if(w==16 && h==16)
4513       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4514         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4515                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4516                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4517       }else{
4518         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4519                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4520                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4521       }
4522     else
4523         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4524                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4525                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4526 }
4527
4528 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4529     MpegEncContext *s= arg;
4530
4531
4532     s->me.pre_pass=1;
4533     s->me.dia_size= s->avctx->pre_dia_size;
4534     s->first_slice_line=1;
4535     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4536         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4537             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4538         }
4539         s->first_slice_line=0;
4540     }
4541
4542     s->me.pre_pass=0;
4543
4544     return 0;
4545 }
4546
4547 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4548     MpegEncContext *s= arg;
4549
4550     s->me.dia_size= s->avctx->dia_size;
4551     s->first_slice_line=1;
4552     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4553         s->mb_x=0; //for block init below
4554         ff_init_block_index(s);
4555         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4556             s->block_index[0]+=2;
4557             s->block_index[1]+=2;
4558             s->block_index[2]+=2;
4559             s->block_index[3]+=2;
4560
4561             /* compute motion vector & mb_type and store in context */
4562             if(s->pict_type==B_TYPE)
4563                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4564             else
4565                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4566         }
4567         s->first_slice_line=0;
4568     }
4569     return 0;
4570 }
4571
4572 static int mb_var_thread(AVCodecContext *c, void *arg){
4573     MpegEncContext *s= arg;
4574     int mb_x, mb_y;
4575
4576     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4577         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4578             int xx = mb_x * 16;
4579             int yy = mb_y * 16;
4580             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4581             int varc;
4582             int sum = s->dsp.pix_sum(pix, s->linesize);
4583
4584             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4585
4586             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4587             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4588             s->me.mb_var_sum_temp    += varc;
4589         }
4590     }
4591     return 0;
4592 }
4593
4594 static void write_slice_end(MpegEncContext *s){
4595     if(s->codec_id==CODEC_ID_MPEG4){
4596         if(s->partitioned_frame){
4597             ff_mpeg4_merge_partitions(s);
4598         }
4599
4600         ff_mpeg4_stuffing(&s->pb);
4601     }else if(s->out_format == FMT_MJPEG){
4602         ff_mjpeg_stuffing(&s->pb);
4603     }
4604
4605     align_put_bits(&s->pb);
4606     flush_put_bits(&s->pb);
4607
4608     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4609         s->misc_bits+= get_bits_diff(s);
4610 }
4611
4612 static int encode_thread(AVCodecContext *c, void *arg){
4613     MpegEncContext *s= arg;
4614     int mb_x, mb_y, pdif = 0;
4615     int i, j;
4616     MpegEncContext best_s, backup_s;
4617     uint8_t bit_buf[2][MAX_MB_BYTES];
4618     uint8_t bit_buf2[2][MAX_MB_BYTES];
4619     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4620     PutBitContext pb[2], pb2[2], tex_pb[2];
4621 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4622
4623     for(i=0; i<2; i++){
4624         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4625         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4626         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4627     }
4628
4629     s->last_bits= put_bits_count(&s->pb);
4630     s->mv_bits=0;
4631     s->misc_bits=0;
4632     s->i_tex_bits=0;
4633     s->p_tex_bits=0;
4634     s->i_count=0;
4635     s->f_count=0;
4636     s->b_count=0;
4637     s->skip_count=0;
4638
4639     for(i=0; i<3; i++){
4640         /* init last dc values */
4641         /* note: quant matrix value (8) is implied here */
4642         s->last_dc[i] = 128 << s->intra_dc_precision;
4643
4644         s->current_picture_ptr->error[i] = 0;
4645     }
4646     s->mb_skip_run = 0;
4647     memset(s->last_mv, 0, sizeof(s->last_mv));
4648
4649     s->last_mv_dir = 0;
4650
4651     switch(s->codec_id){
4652     case CODEC_ID_H263:
4653     case CODEC_ID_H263P:
4654     case CODEC_ID_FLV1:
4655         s->gob_index = ff_h263_get_gob_height(s);
4656         break;
4657     case CODEC_ID_MPEG4:
4658         if(s->partitioned_frame)
4659             ff_mpeg4_init_partitions(s);
4660         break;
4661     }
4662
4663     s->resync_mb_x=0;
4664     s->resync_mb_y=0;
4665     s->first_slice_line = 1;
4666     s->ptr_lastgob = s->pb.buf;
4667     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4668 //    printf("row %d at %X\n", s->mb_y, (int)s);
4669         s->mb_x=0;
4670         s->mb_y= mb_y;
4671
4672         ff_set_qscale(s, s->qscale);
4673         ff_init_block_index(s);
4674
4675         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4676             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4677             int mb_type= s->mb_type[xy];
4678 //            int d;
4679             int dmin= INT_MAX;
4680             int dir;
4681
4682             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4683                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4684                 return -1;
4685             }
4686             if(s->data_partitioning){
4687                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4688                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4689                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4690                     return -1;
4691                 }
4692             }
4693
4694             s->mb_x = mb_x;
4695             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4696             ff_update_block_index(s);
4697
4698 #ifdef CONFIG_H261_ENCODER
4699             if(s->codec_id == CODEC_ID_H261){
4700                 ff_h261_reorder_mb_index(s);
4701                 xy= s->mb_y*s->mb_stride + s->mb_x;
4702                 mb_type= s->mb_type[xy];
4703             }
4704 #endif
4705
4706             /* write gob / video packet header  */
4707             if(s->rtp_mode){
4708                 int current_packet_size, is_gob_start;
4709
4710                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4711
4712                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4713
4714                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4715
4716                 switch(s->codec_id){
4717                 case CODEC_ID_H263:
4718                 case CODEC_ID_H263P:
4719                     if(!s->h263_slice_structured)
4720                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4721                     break;
4722                 case CODEC_ID_MPEG2VIDEO:
4723                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4724                 case CODEC_ID_MPEG1VIDEO:
4725                     if(s->mb_skip_run) is_gob_start=0;
4726                     break;
4727                 }
4728
4729                 if(is_gob_start){
4730                     if(s->start_mb_y != mb_y || mb_x!=0){
4731                         write_slice_end(s);
4732
4733                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4734                             ff_mpeg4_init_partitions(s);
4735                         }
4736                     }
4737
4738                     assert((put_bits_count(&s->pb)&7) == 0);
4739                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4740
4741                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4742                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4743                         int d= 100 / s->avctx->error_rate;
4744                         if(r % d == 0){
4745                             current_packet_size=0;
4746 #ifndef ALT_BITSTREAM_WRITER
4747                             s->pb.buf_ptr= s->ptr_lastgob;
4748 #endif
4749                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4750                         }
4751                     }
4752
4753                     if (s->avctx->rtp_callback){
4754                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4755                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4756                     }
4757
4758                     switch(s->codec_id){
4759                     case CODEC_ID_MPEG4:
4760                         ff_mpeg4_encode_video_packet_header(s);
4761                         ff_mpeg4_clean_buffers(s);
4762                     break;
4763                     case CODEC_ID_MPEG1VIDEO:
4764                     case CODEC_ID_MPEG2VIDEO:
4765                         ff_mpeg1_encode_slice_header(s);
4766                         ff_mpeg1_clean_buffers(s);
4767                     break;
4768                     case CODEC_ID_H263:
4769                     case CODEC_ID_H263P:
4770                         h263_encode_gob_header(s, mb_y);
4771                     break;
4772                     }
4773
4774                     if(s->flags&CODEC_FLAG_PASS1){
4775                         int bits= put_bits_count(&s->pb);
4776                         s->misc_bits+= bits - s->last_bits;
4777                         s->last_bits= bits;
4778                     }
4779
4780                     s->ptr_lastgob += current_packet_size;
4781                     s->first_slice_line=1;
4782                     s->resync_mb_x=mb_x;
4783                     s->resync_mb_y=mb_y;
4784                 }
4785             }
4786
4787             if(  (s->resync_mb_x   == s->mb_x)
4788                && s->resync_mb_y+1 == s->mb_y){
4789                 s->first_slice_line=0;
4790             }
4791
4792             s->mb_skipped=0;
4793             s->dquant=0; //only for QP_RD
4794
4795             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4796                 int next_block=0;
4797                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4798
4799                 copy_context_before_encode(&backup_s, s, -1);
4800                 backup_s.pb= s->pb;
4801                 best_s.data_partitioning= s->data_partitioning;
4802                 best_s.partitioned_frame= s->partitioned_frame;
4803                 if(s->data_partitioning){
4804                     backup_s.pb2= s->pb2;
4805                     backup_s.tex_pb= s->tex_pb;
4806                 }
4807
4808                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4809                     s->mv_dir = MV_DIR_FORWARD;
4810                     s->mv_type = MV_TYPE_16X16;
4811                     s->mb_intra= 0;
4812                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4813                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4814                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4815                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4816                 }
4817                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4818                     s->mv_dir = MV_DIR_FORWARD;
4819                     s->mv_type = MV_TYPE_FIELD;
4820                     s->mb_intra= 0;
4821                     for(i=0; i<2; i++){
4822                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4823                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4824                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4825                     }
4826                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4827                                  &dmin, &next_block, 0, 0);
4828                 }
4829                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
4830                     s->mv_dir = MV_DIR_FORWARD;
4831                     s->mv_type = MV_TYPE_16X16;
4832                     s->mb_intra= 0;
4833                     s->mv[0][0][0] = 0;
4834                     s->mv[0][0][1] = 0;
4835                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
4836                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4837                 }
4838                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4839                     s->mv_dir = MV_DIR_FORWARD;
4840                     s->mv_type = MV_TYPE_8X8;
4841                     s->mb_intra= 0;
4842                     for(i=0; i<4; i++){
4843                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4844                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4845                     }
4846                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4847                                  &dmin, &next_block, 0, 0);
4848                 }
4849                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4850                     s->mv_dir = MV_DIR_FORWARD;
4851                     s->mv_type = MV_TYPE_16X16;
4852                     s->mb_intra= 0;
4853                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4854                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4855                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4856                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4857                 }
4858                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4859                     s->mv_dir = MV_DIR_BACKWARD;
4860                     s->mv_type = MV_TYPE_16X16;
4861                     s->mb_intra= 0;
4862                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4863                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4864                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4865                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4866                 }
4867                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4868                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4869                     s->mv_type = MV_TYPE_16X16;
4870                     s->mb_intra= 0;
4871                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4872                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4873                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4874                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4875                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4876                                  &dmin, &next_block, 0, 0);
4877                 }
4878                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4879                     int mx= s->b_direct_mv_table[xy][0];
4880                     int my= s->b_direct_mv_table[xy][1];
4881
4882                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4883                     s->mb_intra= 0;
4884                     ff_mpeg4_set_direct_mv(s, mx, my);
4885                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4886                                  &dmin, &next_block, mx, my);
4887                 }
4888                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4889                     s->mv_dir = MV_DIR_FORWARD;
4890                     s->mv_type = MV_TYPE_FIELD;
4891                     s->mb_intra= 0;
4892                     for(i=0; i<2; i++){
4893                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4894                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4895                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4896                     }
4897                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4898                                  &dmin, &next_block, 0, 0);
4899                 }
4900                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4901                     s->mv_dir = MV_DIR_BACKWARD;
4902                     s->mv_type = MV_TYPE_FIELD;
4903                     s->mb_intra= 0;
4904                     for(i=0; i<2; i++){
4905                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4906                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4907                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4908                     }
4909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4910                                  &dmin, &next_block, 0, 0);
4911                 }
4912                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4913                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4914                     s->mv_type = MV_TYPE_FIELD;
4915                     s->mb_intra= 0;
4916                     for(dir=0; dir<2; dir++){
4917                         for(i=0; i<2; i++){
4918                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4919                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4920                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4921                         }
4922                     }
4923                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4924                                  &dmin, &next_block, 0, 0);
4925                 }
4926                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4927                     s->mv_dir = 0;
4928                     s->mv_type = MV_TYPE_16X16;
4929                     s->mb_intra= 1;
4930                     s->mv[0][0][0] = 0;
4931                     s->mv[0][0][1] = 0;
4932                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4933                                  &dmin, &next_block, 0, 0);
4934                     if(s->h263_pred || s->h263_aic){
4935                         if(best_s.mb_intra)
4936                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4937                         else
4938                             ff_clean_intra_table_entries(s); //old mode?
4939                     }
4940                 }
4941
4942                 if(s->flags & CODEC_FLAG_QP_RD){
4943                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4944                         const int last_qp= backup_s.qscale;
4945                         int dquant, dir, qp, dc[6];
4946                         DCTELEM ac[6][16];
4947                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4948
4949                         assert(backup_s.dquant == 0);
4950
4951                         //FIXME intra
4952                         s->mv_dir= best_s.mv_dir;
4953                         s->mv_type = MV_TYPE_16X16;
4954                         s->mb_intra= best_s.mb_intra;
4955                         s->mv[0][0][0] = best_s.mv[0][0][0];
4956                         s->mv[0][0][1] = best_s.mv[0][0][1];
4957                         s->mv[1][0][0] = best_s.mv[1][0][0];
4958                         s->mv[1][0][1] = best_s.mv[1][0][1];
4959
4960                         dir= s->pict_type == B_TYPE ? 2 : 1;
4961                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4962                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4963                             qp= last_qp + dquant;
4964                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4965                                 break;
4966                             backup_s.dquant= dquant;
4967                             if(s->mb_intra && s->dc_val[0]){
4968                                 for(i=0; i<6; i++){
4969                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4970                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4971                                 }
4972                             }
4973
4974                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4975                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4976                             if(best_s.qscale != qp){
4977                                 if(s->mb_intra && s->dc_val[0]){
4978                                     for(i=0; i<6; i++){
4979                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4980                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4981                                     }
4982                                 }
4983                                 if(dir > 0 && dquant==dir){
4984                                     dquant= 0;
4985                                     dir= -dir;
4986                                 }else
4987                                     break;
4988                             }
4989                         }
4990                         qp= best_s.qscale;
4991                         s->current_picture.qscale_table[xy]= qp;
4992                     }
4993                 }
4994
4995                 copy_context_after_encode(s, &best_s, -1);
4996
4997                 pb_bits_count= put_bits_count(&s->pb);
4998                 flush_put_bits(&s->pb);
4999                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5000                 s->pb= backup_s.pb;
5001
5002                 if(s->data_partitioning){
5003                     pb2_bits_count= put_bits_count(&s->pb2);
5004                     flush_put_bits(&s->pb2);
5005                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5006                     s->pb2= backup_s.pb2;
5007
5008                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5009                     flush_put_bits(&s->tex_pb);
5010                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5011                     s->tex_pb= backup_s.tex_pb;
5012                 }
5013                 s->last_bits= put_bits_count(&s->pb);
5014
5015                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5016                     ff_h263_update_motion_val(s);
5017
5018                 if(next_block==0){ //FIXME 16 vs linesize16
5019                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5020                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5021                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5022                 }
5023
5024                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5025                     MPV_decode_mb(s, s->block);
5026             } else {
5027                 int motion_x, motion_y;
5028                 s->mv_type=MV_TYPE_16X16;
5029                 // only one MB-Type possible
5030
5031                 switch(mb_type){
5032                 case CANDIDATE_MB_TYPE_INTRA:
5033                     s->mv_dir = 0;
5034                     s->mb_intra= 1;
5035                     motion_x= s->mv[0][0][0] = 0;
5036                     motion_y= s->mv[0][0][1] = 0;
5037                     break;
5038                 case CANDIDATE_MB_TYPE_INTER:
5039                     s->mv_dir = MV_DIR_FORWARD;
5040                     s->mb_intra= 0;
5041                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5042                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5043                     break;
5044                 case CANDIDATE_MB_TYPE_INTER_I:
5045                     s->mv_dir = MV_DIR_FORWARD;
5046                     s->mv_type = MV_TYPE_FIELD;
5047                     s->mb_intra= 0;
5048                     for(i=0; i<2; i++){
5049                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5050                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5051                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5052                     }
5053                     motion_x = motion_y = 0;
5054                     break;
5055                 case CANDIDATE_MB_TYPE_INTER4V:
5056                     s->mv_dir = MV_DIR_FORWARD;
5057                     s->mv_type = MV_TYPE_8X8;
5058                     s->mb_intra= 0;
5059                     for(i=0; i<4; i++){
5060                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5061                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5062                     }
5063                     motion_x= motion_y= 0;
5064                     break;
5065                 case CANDIDATE_MB_TYPE_DIRECT:
5066                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5067                     s->mb_intra= 0;
5068                     motion_x=s->b_direct_mv_table[xy][0];
5069                     motion_y=s->b_direct_mv_table[xy][1];
5070                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5071                     break;
5072                 case CANDIDATE_MB_TYPE_BIDIR:
5073                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5074                     s->mb_intra= 0;
5075                     motion_x=0;
5076                     motion_y=0;
5077                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5078                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5079                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5080                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5081                     break;
5082                 case CANDIDATE_MB_TYPE_BACKWARD:
5083                     s->mv_dir = MV_DIR_BACKWARD;
5084                     s->mb_intra= 0;
5085                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5086                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5087                     break;
5088                 case CANDIDATE_MB_TYPE_FORWARD:
5089                     s->mv_dir = MV_DIR_FORWARD;
5090                     s->mb_intra= 0;
5091                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5092                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5093 //                    printf(" %d %d ", motion_x, motion_y);
5094                     break;
5095                 case CANDIDATE_MB_TYPE_FORWARD_I:
5096                     s->mv_dir = MV_DIR_FORWARD;
5097                     s->mv_type = MV_TYPE_FIELD;
5098                     s->mb_intra= 0;
5099                     for(i=0; i<2; i++){
5100                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5101                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5102                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5103                     }
5104                     motion_x=motion_y=0;
5105                     break;
5106                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5107                     s->mv_dir = MV_DIR_BACKWARD;
5108                     s->mv_type = MV_TYPE_FIELD;
5109                     s->mb_intra= 0;
5110                     for(i=0; i<2; i++){
5111                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5112                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5113                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5114                     }
5115                     motion_x=motion_y=0;
5116                     break;
5117                 case CANDIDATE_MB_TYPE_BIDIR_I:
5118                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5119                     s->mv_type = MV_TYPE_FIELD;
5120                     s->mb_intra= 0;
5121                     for(dir=0; dir<2; dir++){
5122                         for(i=0; i<2; i++){
5123                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5124                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5125                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5126                         }
5127                     }
5128                     motion_x=motion_y=0;
5129                     break;
5130                 default:
5131                     motion_x=motion_y=0; //gcc warning fix
5132                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5133                 }
5134
5135                 encode_mb(s, motion_x, motion_y);
5136
5137                 // RAL: Update last macroblock type
5138                 s->last_mv_dir = s->mv_dir;
5139
5140                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5141                     ff_h263_update_motion_val(s);
5142
5143                 MPV_decode_mb(s, s->block);
5144             }
5145
5146             /* clean the MV table in IPS frames for direct mode in B frames */
5147             if(s->mb_intra /* && I,P,S_TYPE */){
5148                 s->p_mv_table[xy][0]=0;
5149                 s->p_mv_table[xy][1]=0;
5150             }
5151
5152             if(s->flags&CODEC_FLAG_PSNR){
5153                 int w= 16;
5154                 int h= 16;
5155
5156                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5157                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5158
5159                 s->current_picture_ptr->error[0] += sse(
5160                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5161                     s->dest[0], w, h, s->linesize);
5162                 s->current_picture_ptr->error[1] += sse(
5163                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5164                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5165                 s->current_picture_ptr->error[2] += sse(
5166                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5167                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5168             }
5169             if(s->loop_filter){
5170                 if(s->out_format == FMT_H263)
5171                     ff_h263_loop_filter(s);
5172             }
5173 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5174         }
5175     }
5176
5177     //not beautiful here but we must write it before flushing so it has to be here
5178     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5179         msmpeg4_encode_ext_header(s);
5180
5181     write_slice_end(s);
5182
5183     /* Send the last GOB if RTP */
5184     if (s->avctx->rtp_callback) {
5185         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5186         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5187         /* Call the RTP callback to send the last GOB */
5188         emms_c();
5189         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5190     }
5191
5192     return 0;
5193 }
5194
5195 #define MERGE(field) dst->field += src->field; src->field=0
5196 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5197     MERGE(me.scene_change_score);
5198     MERGE(me.mc_mb_var_sum_temp);
5199     MERGE(me.mb_var_sum_temp);
5200 }
5201
5202 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5203     int i;
5204
5205     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5206     MERGE(dct_count[1]);
5207     MERGE(mv_bits);
5208     MERGE(i_tex_bits);
5209     MERGE(p_tex_bits);
5210     MERGE(i_count);
5211     MERGE(f_count);
5212     MERGE(b_count);
5213     MERGE(skip_count);
5214     MERGE(misc_bits);
5215     MERGE(error_count);
5216     MERGE(padding_bug_score);
5217
5218     if(dst->avctx->noise_reduction){
5219         for(i=0; i<64; i++){
5220             MERGE(dct_error_sum[0][i]);
5221             MERGE(dct_error_sum[1][i]);
5222         }
5223     }
5224
5225     assert(put_bits_count(&src->pb) % 8 ==0);
5226     assert(put_bits_count(&dst->pb) % 8 ==0);
5227     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5228     flush_put_bits(&dst->pb);
5229 }
5230
5231 static void encode_picture(MpegEncContext *s, int picture_number)
5232 {
5233     int i;
5234     int bits;
5235
5236     s->picture_number = picture_number;
5237
5238     /* Reset the average MB variance */
5239     s->me.mb_var_sum_temp    =
5240     s->me.mc_mb_var_sum_temp = 0;
5241
5242     /* we need to initialize some time vars before we can encode b-frames */
5243     // RAL: Condition added for MPEG1VIDEO
5244     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5245         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5246
5247     s->me.scene_change_score=0;
5248
5249 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5250
5251     if(s->pict_type==I_TYPE){
5252         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5253         else                        s->no_rounding=0;
5254     }else if(s->pict_type!=B_TYPE){
5255         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5256             s->no_rounding ^= 1;
5257     }
5258
5259     s->mb_intra=0; //for the rate distortion & bit compare functions
5260     for(i=1; i<s->avctx->thread_count; i++){
5261         ff_update_duplicate_context(s->thread_context[i], s);
5262     }
5263
5264     ff_init_me(s);
5265
5266     /* Estimate motion for every MB */
5267     if(s->pict_type != I_TYPE){
5268         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5269         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5270         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5271             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5272                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5273             }
5274         }
5275
5276         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5277     }else /* if(s->pict_type == I_TYPE) */{
5278         /* I-Frame */
5279         for(i=0; i<s->mb_stride*s->mb_height; i++)
5280             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5281
5282         if(!s->fixed_qscale){
5283             /* finding spatial complexity for I-frame rate control */
5284             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5285         }
5286     }
5287     for(i=1; i<s->avctx->thread_count; i++){
5288         merge_context_after_me(s, s->thread_context[i]);
5289     }
5290     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5291     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5292     emms_c();
5293
5294     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5295         s->pict_type= I_TYPE;
5296         for(i=0; i<s->mb_stride*s->mb_height; i++)
5297             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5298 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5299     }
5300
5301     if(!s->umvplus){
5302         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5303             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5304
5305             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5306                 int a,b;
5307                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5308                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5309                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5310             }
5311
5312             ff_fix_long_p_mvs(s);
5313             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5314             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5315                 int j;
5316                 for(i=0; i<2; i++){
5317                     for(j=0; j<2; j++)
5318                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5319                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5320                 }
5321             }
5322         }
5323
5324         if(s->pict_type==B_TYPE){
5325             int a, b;
5326
5327             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5328             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5329             s->f_code = FFMAX(a, b);
5330
5331             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5332             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5333             s->b_code = FFMAX(a, b);
5334
5335             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5336             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5337             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5338             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5339             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5340                 int dir, j;
5341                 for(dir=0; dir<2; dir++){
5342                     for(i=0; i<2; i++){
5343                         for(j=0; j<2; j++){
5344                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5345                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5346                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5347                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5348                         }
5349                     }
5350                 }
5351             }
5352         }
5353     }
5354
5355     if (!s->fixed_qscale)
5356         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5357
5358     if(s->adaptive_quant){
5359         switch(s->codec_id){
5360         case CODEC_ID_MPEG4:
5361             ff_clean_mpeg4_qscales(s);
5362             break;
5363         case CODEC_ID_H263:
5364         case CODEC_ID_H263P:
5365         case CODEC_ID_FLV1:
5366             ff_clean_h263_qscales(s);
5367             break;
5368         }
5369
5370         s->lambda= s->lambda_table[0];
5371         //FIXME broken
5372     }else
5373         s->lambda= s->current_picture.quality;
5374 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5375     update_qscale(s);
5376
5377     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5378         s->qscale= 3; //reduce clipping problems
5379
5380     if (s->out_format == FMT_MJPEG) {
5381         /* for mjpeg, we do include qscale in the matrix */
5382         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5383         for(i=1;i<64;i++){
5384             int j= s->dsp.idct_permutation[i];
5385
5386             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
5387         }
5388         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5389                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5390         s->qscale= 8;
5391     }
5392
5393     //FIXME var duplication
5394     s->current_picture_ptr->key_frame=
5395     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5396     s->current_picture_ptr->pict_type=
5397     s->current_picture.pict_type= s->pict_type;
5398
5399     if(s->current_picture.key_frame)
5400         s->picture_in_gop_number=0;
5401
5402     s->last_bits= put_bits_count(&s->pb);
5403     switch(s->out_format) {
5404     case FMT_MJPEG:
5405         mjpeg_picture_header(s);
5406         break;
5407 #ifdef CONFIG_H261_ENCODER
5408     case FMT_H261:
5409         ff_h261_encode_picture_header(s, picture_number);
5410         break;
5411 #endif
5412     case FMT_H263:
5413         if (s->codec_id == CODEC_ID_WMV2)
5414             ff_wmv2_encode_picture_header(s, picture_number);
5415         else if (s->h263_msmpeg4)
5416             msmpeg4_encode_picture_header(s, picture_number);
5417         else if (s->h263_pred)
5418             mpeg4_encode_picture_header(s, picture_number);
5419 #ifdef CONFIG_RV10_ENCODER
5420         else if (s->codec_id == CODEC_ID_RV10)
5421             rv10_encode_picture_header(s, picture_number);
5422 #endif
5423 #ifdef CONFIG_RV20_ENCODER
5424         else if (s->codec_id == CODEC_ID_RV20)
5425             rv20_encode_picture_header(s, picture_number);
5426 #endif
5427         else if (s->codec_id == CODEC_ID_FLV1)
5428             ff_flv_encode_picture_header(s, picture_number);
5429         else
5430             h263_encode_picture_header(s, picture_number);
5431         break;
5432     case FMT_MPEG1:
5433         mpeg1_encode_picture_header(s, picture_number);
5434         break;
5435     case FMT_H264:
5436         break;
5437     default:
5438         assert(0);
5439     }
5440     bits= put_bits_count(&s->pb);
5441     s->header_bits= bits - s->last_bits;
5442
5443     for(i=1; i<s->avctx->thread_count; i++){
5444         update_duplicate_context_after_me(s->thread_context[i], s);
5445     }
5446     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5447     for(i=1; i<s->avctx->thread_count; i++){
5448         merge_context_after_encode(s, s->thread_context[i]);
5449     }
5450     emms_c();
5451 }
5452
5453 #endif //CONFIG_ENCODERS
5454
5455 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5456     const int intra= s->mb_intra;
5457     int i;
5458
5459     s->dct_count[intra]++;
5460
5461     for(i=0; i<64; i++){
5462         int level= block[i];
5463
5464         if(level){
5465             if(level>0){
5466                 s->dct_error_sum[intra][i] += level;
5467                 level -= s->dct_offset[intra][i];
5468                 if(level<0) level=0;
5469             }else{
5470                 s->dct_error_sum[intra][i] -= level;
5471                 level += s->dct_offset[intra][i];
5472                 if(level>0) level=0;
5473             }
5474             block[i]= level;
5475         }
5476     }
5477 }
5478
5479 #ifdef CONFIG_ENCODERS
5480
5481 static int dct_quantize_trellis_c(MpegEncContext *s,
5482                         DCTELEM *block, int n,
5483                         int qscale, int *overflow){
5484     const int *qmat;
5485     const uint8_t *scantable= s->intra_scantable.scantable;
5486     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5487     int max=0;
5488     unsigned int threshold1, threshold2;
5489     int bias=0;
5490     int run_tab[65];
5491     int level_tab[65];
5492     int score_tab[65];
5493     int survivor[65];
5494     int survivor_count;
5495     int last_run=0;
5496     int last_level=0;
5497     int last_score= 0;
5498     int last_i;
5499     int coeff[2][64];
5500     int coeff_count[64];
5501     int qmul, qadd, start_i, last_non_zero, i, dc;
5502     const int esc_length= s->ac_esc_length;
5503     uint8_t * length;
5504     uint8_t * last_length;
5505     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5506
5507     s->dsp.fdct (block);
5508
5509     if(s->dct_error_sum)
5510         s->denoise_dct(s, block);
5511     qmul= qscale*16;
5512     qadd= ((qscale-1)|1)*8;
5513
5514     if (s->mb_intra) {
5515         int q;
5516         if (!s->h263_aic) {
5517             if (n < 4)
5518                 q = s->y_dc_scale;
5519             else
5520                 q = s->c_dc_scale;
5521             q = q << 3;
5522         } else{
5523             /* For AIC we skip quant/dequant of INTRADC */
5524             q = 1 << 3;
5525             qadd=0;
5526         }
5527
5528         /* note: block[0] is assumed to be positive */
5529         block[0] = (block[0] + (q >> 1)) / q;
5530         start_i = 1;
5531         last_non_zero = 0;
5532         qmat = s->q_intra_matrix[qscale];
5533         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5534             bias= 1<<(QMAT_SHIFT-1);
5535         length     = s->intra_ac_vlc_length;
5536         last_length= s->intra_ac_vlc_last_length;
5537     } else {
5538         start_i = 0;
5539         last_non_zero = -1;
5540         qmat = s->q_inter_matrix[qscale];
5541         length     = s->inter_ac_vlc_length;
5542         last_length= s->inter_ac_vlc_last_length;
5543     }
5544     last_i= start_i;
5545
5546     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5547     threshold2= (threshold1<<1);
5548
5549     for(i=63; i>=start_i; i--) {
5550         const int j = scantable[i];
5551         int level = block[j] * qmat[j];
5552
5553         if(((unsigned)(level+threshold1))>threshold2){
5554             last_non_zero = i;
5555             break;
5556         }
5557     }
5558
5559     for(i=start_i; i<=last_non_zero; i++) {
5560         const int j = scantable[i];
5561         int level = block[j] * qmat[j];
5562
5563 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5564 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5565         if(((unsigned)(level+threshold1))>threshold2){
5566             if(level>0){
5567                 level= (bias + level)>>QMAT_SHIFT;
5568                 coeff[0][i]= level;
5569                 coeff[1][i]= level-1;
5570 //                coeff[2][k]= level-2;
5571             }else{
5572                 level= (bias - level)>>QMAT_SHIFT;
5573                 coeff[0][i]= -level;
5574                 coeff[1][i]= -level+1;
5575 //                coeff[2][k]= -level+2;
5576             }
5577             coeff_count[i]= FFMIN(level, 2);
5578             assert(coeff_count[i]);
5579             max |=level;
5580         }else{
5581             coeff[0][i]= (level>>31)|1;
5582             coeff_count[i]= 1;
5583         }
5584     }
5585
5586     *overflow= s->max_qcoeff < max; //overflow might have happened
5587
5588     if(last_non_zero < start_i){
5589         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5590         return last_non_zero;
5591     }
5592
5593     score_tab[start_i]= 0;
5594     survivor[0]= start_i;
5595     survivor_count= 1;
5596
5597     for(i=start_i; i<=last_non_zero; i++){
5598         int level_index, j;
5599         const int dct_coeff= ABS(block[ scantable[i] ]);
5600         const int zero_distoration= dct_coeff*dct_coeff;
5601         int best_score=256*256*256*120;
5602         for(level_index=0; level_index < coeff_count[i]; level_index++){
5603             int distoration;
5604             int level= coeff[level_index][i];
5605             const int alevel= ABS(level);
5606             int unquant_coeff;
5607
5608             assert(level);
5609
5610             if(s->out_format == FMT_H263){
5611                 unquant_coeff= alevel*qmul + qadd;
5612             }else{ //MPEG1
5613                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5614                 if(s->mb_intra){
5615                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5616                         unquant_coeff =   (unquant_coeff - 1) | 1;
5617                 }else{
5618                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5619                         unquant_coeff =   (unquant_coeff - 1) | 1;
5620                 }
5621                 unquant_coeff<<= 3;
5622             }
5623
5624             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5625             level+=64;
5626             if((level&(~127)) == 0){
5627                 for(j=survivor_count-1; j>=0; j--){
5628                     int run= i - survivor[j];
5629                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5630                     score += score_tab[i-run];
5631
5632                     if(score < best_score){
5633                         best_score= score;
5634                         run_tab[i+1]= run;
5635                         level_tab[i+1]= level-64;
5636                     }
5637                 }
5638
5639                 if(s->out_format == FMT_H263){
5640                     for(j=survivor_count-1; j>=0; j--){
5641                         int run= i - survivor[j];
5642                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5643                         score += score_tab[i-run];
5644                         if(score < last_score){
5645                             last_score= score;
5646                             last_run= run;
5647                             last_level= level-64;
5648                             last_i= i+1;
5649                         }
5650                     }
5651                 }
5652             }else{
5653                 distoration += esc_length*lambda;
5654                 for(j=survivor_count-1; j>=0; j--){
5655                     int run= i - survivor[j];
5656                     int score= distoration + score_tab[i-run];
5657
5658                     if(score < best_score){
5659                         best_score= score;
5660                         run_tab[i+1]= run;
5661                         level_tab[i+1]= level-64;
5662                     }
5663                 }
5664
5665                 if(s->out_format == FMT_H263){
5666                   for(j=survivor_count-1; j>=0; j--){
5667                         int run= i - survivor[j];
5668                         int score= distoration + score_tab[i-run];
5669                         if(score < last_score){
5670                             last_score= score;
5671                             last_run= run;
5672                             last_level= level-64;
5673                             last_i= i+1;
5674                         }
5675                     }
5676                 }
5677             }
5678         }
5679
5680         score_tab[i+1]= best_score;
5681
5682         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5683         if(last_non_zero <= 27){
5684             for(; survivor_count; survivor_count--){
5685                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5686                     break;
5687             }
5688         }else{
5689             for(; survivor_count; survivor_count--){
5690                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5691                     break;
5692             }
5693         }
5694
5695         survivor[ survivor_count++ ]= i+1;
5696     }
5697
5698     if(s->out_format != FMT_H263){
5699         last_score= 256*256*256*120;
5700         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5701             int score= score_tab[i];
5702             if(i) score += lambda*2; //FIXME exacter?
5703
5704             if(score < last_score){
5705                 last_score= score;
5706                 last_i= i;
5707                 last_level= level_tab[i];
5708                 last_run= run_tab[i];
5709             }
5710         }
5711     }
5712
5713     s->coded_score[n] = last_score;
5714
5715     dc= ABS(block[0]);
5716     last_non_zero= last_i - 1;
5717     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5718
5719     if(last_non_zero < start_i)
5720         return last_non_zero;
5721
5722     if(last_non_zero == 0 && start_i == 0){
5723         int best_level= 0;
5724         int best_score= dc * dc;
5725
5726         for(i=0; i<coeff_count[0]; i++){
5727             int level= coeff[i][0];
5728             int alevel= ABS(level);
5729             int unquant_coeff, score, distortion;
5730
5731             if(s->out_format == FMT_H263){
5732                     unquant_coeff= (alevel*qmul + qadd)>>3;
5733             }else{ //MPEG1
5734                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5735                     unquant_coeff =   (unquant_coeff - 1) | 1;
5736             }
5737             unquant_coeff = (unquant_coeff + 4) >> 3;
5738             unquant_coeff<<= 3 + 3;
5739
5740             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5741             level+=64;
5742             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5743             else                    score= distortion + esc_length*lambda;
5744
5745             if(score < best_score){
5746                 best_score= score;
5747                 best_level= level - 64;
5748             }
5749         }
5750         block[0]= best_level;
5751         s->coded_score[n] = best_score - dc*dc;
5752         if(best_level == 0) return -1;
5753         else                return last_non_zero;
5754     }
5755
5756     i= last_i;
5757     assert(last_level);
5758
5759     block[ perm_scantable[last_non_zero] ]= last_level;
5760     i -= last_run + 1;
5761
5762     for(; i>start_i; i -= run_tab[i] + 1){
5763         block[ perm_scantable[i-1] ]= level_tab[i];
5764     }
5765
5766     return last_non_zero;
5767 }
5768
5769 //#define REFINE_STATS 1
5770 static int16_t basis[64][64];
5771
5772 static void build_basis(uint8_t *perm){
5773     int i, j, x, y;
5774     emms_c();
5775     for(i=0; i<8; i++){
5776         for(j=0; j<8; j++){
5777             for(y=0; y<8; y++){
5778                 for(x=0; x<8; x++){
5779                     double s= 0.25*(1<<BASIS_SHIFT);
5780                     int index= 8*i + j;
5781                     int perm_index= perm[index];
5782                     if(i==0) s*= sqrt(0.5);
5783                     if(j==0) s*= sqrt(0.5);
5784                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5785                 }
5786             }
5787         }
5788     }
5789 }
5790
5791 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5792                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5793                         int n, int qscale){
5794     int16_t rem[64];
5795     DCTELEM d1[64] __align16;
5796     const int *qmat;
5797     const uint8_t *scantable= s->intra_scantable.scantable;
5798     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5799 //    unsigned int threshold1, threshold2;
5800 //    int bias=0;
5801     int run_tab[65];
5802     int prev_run=0;
5803     int prev_level=0;
5804     int qmul, qadd, start_i, last_non_zero, i, dc;
5805     uint8_t * length;
5806     uint8_t * last_length;
5807     int lambda;
5808     int rle_index, run, q, sum;
5809 #ifdef REFINE_STATS
5810 static int count=0;
5811 static int after_last=0;
5812 static int to_zero=0;
5813 static int from_zero=0;
5814 static int raise=0;
5815 static int lower=0;
5816 static int messed_sign=0;
5817 #endif
5818
5819     if(basis[0][0] == 0)
5820         build_basis(s->dsp.idct_permutation);
5821
5822     qmul= qscale*2;
5823     qadd= (qscale-1)|1;
5824     if (s->mb_intra) {
5825         if (!s->h263_aic) {
5826             if (n < 4)
5827                 q = s->y_dc_scale;
5828             else
5829                 q = s->c_dc_scale;
5830         } else{
5831             /* For AIC we skip quant/dequant of INTRADC */
5832             q = 1;
5833             qadd=0;
5834         }
5835         q <<= RECON_SHIFT-3;
5836         /* note: block[0] is assumed to be positive */
5837         dc= block[0]*q;
5838 //        block[0] = (block[0] + (q >> 1)) / q;
5839         start_i = 1;
5840         qmat = s->q_intra_matrix[qscale];
5841 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5842 //            bias= 1<<(QMAT_SHIFT-1);
5843         length     = s->intra_ac_vlc_length;
5844         last_length= s->intra_ac_vlc_last_length;
5845     } else {
5846         dc= 0;
5847         start_i = 0;
5848         qmat = s->q_inter_matrix[qscale];
5849         length     = s->inter_ac_vlc_length;
5850         last_length= s->inter_ac_vlc_last_length;
5851     }
5852     last_non_zero = s->block_last_index[n];
5853
5854 #ifdef REFINE_STATS
5855 {START_TIMER
5856 #endif
5857     dc += (1<<(RECON_SHIFT-1));
5858     for(i=0; i<64; i++){
5859         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
5860     }
5861 #ifdef REFINE_STATS
5862 STOP_TIMER("memset rem[]")}
5863 #endif
5864     sum=0;
5865     for(i=0; i<64; i++){
5866         int one= 36;
5867         int qns=4;
5868         int w;
5869
5870         w= ABS(weight[i]) + qns*one;
5871         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5872
5873         weight[i] = w;
5874 //        w=weight[i] = (63*qns + (w/2)) / w;
5875
5876         assert(w>0);
5877         assert(w<(1<<6));
5878         sum += w*w;
5879     }
5880     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5881 #ifdef REFINE_STATS
5882 {START_TIMER
5883 #endif
5884     run=0;
5885     rle_index=0;
5886     for(i=start_i; i<=last_non_zero; i++){
5887         int j= perm_scantable[i];
5888         const int level= block[j];
5889         int coeff;
5890
5891         if(level){
5892             if(level<0) coeff= qmul*level - qadd;
5893             else        coeff= qmul*level + qadd;
5894             run_tab[rle_index++]=run;
5895             run=0;
5896
5897             s->dsp.add_8x8basis(rem, basis[j], coeff);
5898         }else{
5899             run++;
5900         }
5901     }
5902 #ifdef REFINE_STATS
5903 if(last_non_zero>0){
5904 STOP_TIMER("init rem[]")
5905 }
5906 }
5907
5908 {START_TIMER
5909 #endif
5910     for(;;){
5911         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5912         int best_coeff=0;
5913         int best_change=0;
5914         int run2, best_unquant_change=0, analyze_gradient;
5915 #ifdef REFINE_STATS
5916 {START_TIMER
5917 #endif
5918         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5919
5920         if(analyze_gradient){
5921 #ifdef REFINE_STATS
5922 {START_TIMER
5923 #endif
5924             for(i=0; i<64; i++){
5925                 int w= weight[i];
5926
5927                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5928             }
5929 #ifdef REFINE_STATS
5930 STOP_TIMER("rem*w*w")}
5931 {START_TIMER
5932 #endif
5933             s->dsp.fdct(d1);
5934 #ifdef REFINE_STATS
5935 STOP_TIMER("dct")}
5936 #endif
5937         }
5938
5939         if(start_i){
5940             const int level= block[0];
5941             int change, old_coeff;
5942
5943             assert(s->mb_intra);
5944
5945             old_coeff= q*level;
5946
5947             for(change=-1; change<=1; change+=2){
5948                 int new_level= level + change;
5949                 int score, new_coeff;
5950
5951                 new_coeff= q*new_level;
5952                 if(new_coeff >= 2048 || new_coeff < 0)
5953                     continue;
5954
5955                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5956                 if(score<best_score){
5957                     best_score= score;
5958                     best_coeff= 0;
5959                     best_change= change;
5960                     best_unquant_change= new_coeff - old_coeff;
5961                 }
5962             }
5963         }
5964
5965         run=0;
5966         rle_index=0;
5967         run2= run_tab[rle_index++];
5968         prev_level=0;
5969         prev_run=0;
5970
5971         for(i=start_i; i<64; i++){
5972             int j= perm_scantable[i];
5973             const int level= block[j];
5974             int change, old_coeff;
5975
5976             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5977                 break;
5978
5979             if(level){
5980                 if(level<0) old_coeff= qmul*level - qadd;
5981                 else        old_coeff= qmul*level + qadd;
5982                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5983             }else{
5984                 old_coeff=0;
5985                 run2--;
5986                 assert(run2>=0 || i >= last_non_zero );
5987             }
5988
5989             for(change=-1; change<=1; change+=2){
5990                 int new_level= level + change;
5991                 int score, new_coeff, unquant_change;
5992
5993                 score=0;
5994                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5995                    continue;
5996
5997                 if(new_level){
5998                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5999                     else            new_coeff= qmul*new_level + qadd;
6000                     if(new_coeff >= 2048 || new_coeff <= -2048)
6001                         continue;
6002                     //FIXME check for overflow
6003
6004                     if(level){
6005                         if(level < 63 && level > -63){
6006                             if(i < last_non_zero)
6007                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6008                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6009                             else
6010                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6011                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6012                         }
6013                     }else{
6014                         assert(ABS(new_level)==1);
6015
6016                         if(analyze_gradient){
6017                             int g= d1[ scantable[i] ];
6018                             if(g && (g^new_level) >= 0)
6019                                 continue;
6020                         }
6021
6022                         if(i < last_non_zero){
6023                             int next_i= i + run2 + 1;
6024                             int next_level= block[ perm_scantable[next_i] ] + 64;
6025
6026                             if(next_level&(~127))
6027                                 next_level= 0;
6028
6029                             if(next_i < last_non_zero)
6030                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6031                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6032                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6033                             else
6034                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6035                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6036                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6037                         }else{
6038                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6039                             if(prev_level){
6040                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6041                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6042                             }
6043                         }
6044                     }
6045                 }else{
6046                     new_coeff=0;
6047                     assert(ABS(level)==1);
6048
6049                     if(i < last_non_zero){
6050                         int next_i= i + run2 + 1;
6051                         int next_level= block[ perm_scantable[next_i] ] + 64;
6052
6053                         if(next_level&(~127))
6054                             next_level= 0;
6055
6056                         if(next_i < last_non_zero)
6057                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6058                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6059                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6060                         else
6061                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6062                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6063                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6064                     }else{
6065                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6066                         if(prev_level){
6067                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6068                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6069                         }
6070                     }
6071                 }
6072
6073                 score *= lambda;
6074
6075                 unquant_change= new_coeff - old_coeff;
6076                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6077
6078                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6079                 if(score<best_score){
6080                     best_score= score;
6081                     best_coeff= i;
6082                     best_change= change;
6083                     best_unquant_change= unquant_change;
6084                 }
6085             }
6086             if(level){
6087                 prev_level= level + 64;
6088                 if(prev_level&(~127))
6089                     prev_level= 0;
6090                 prev_run= run;
6091                 run=0;
6092             }else{
6093                 run++;
6094             }
6095         }
6096 #ifdef REFINE_STATS
6097 STOP_TIMER("iterative step")}
6098 #endif
6099
6100         if(best_change){
6101             int j= perm_scantable[ best_coeff ];
6102
6103             block[j] += best_change;
6104
6105             if(best_coeff > last_non_zero){
6106                 last_non_zero= best_coeff;
6107                 assert(block[j]);
6108 #ifdef REFINE_STATS
6109 after_last++;
6110 #endif
6111             }else{
6112 #ifdef REFINE_STATS
6113 if(block[j]){
6114     if(block[j] - best_change){
6115         if(ABS(block[j]) > ABS(block[j] - best_change)){
6116             raise++;
6117         }else{
6118             lower++;
6119         }
6120     }else{
6121         from_zero++;
6122     }
6123 }else{
6124     to_zero++;
6125 }
6126 #endif
6127                 for(; last_non_zero>=start_i; last_non_zero--){
6128                     if(block[perm_scantable[last_non_zero]])
6129                         break;
6130                 }
6131             }
6132 #ifdef REFINE_STATS
6133 count++;
6134 if(256*256*256*64 % count == 0){
6135     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6136 }
6137 #endif
6138             run=0;
6139             rle_index=0;
6140             for(i=start_i; i<=last_non_zero; i++){
6141                 int j= perm_scantable[i];
6142                 const int level= block[j];
6143
6144                  if(level){
6145                      run_tab[rle_index++]=run;
6146                      run=0;
6147                  }else{
6148                      run++;
6149                  }
6150             }
6151
6152             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6153         }else{
6154             break;
6155         }
6156     }
6157 #ifdef REFINE_STATS
6158 if(last_non_zero>0){
6159 STOP_TIMER("iterative search")
6160 }
6161 }
6162 #endif
6163
6164     return last_non_zero;
6165 }
6166
6167 static int dct_quantize_c(MpegEncContext *s,
6168                         DCTELEM *block, int n,
6169                         int qscale, int *overflow)
6170 {
6171     int i, j, level, last_non_zero, q, start_i;
6172     const int *qmat;
6173     const uint8_t *scantable= s->intra_scantable.scantable;
6174     int bias;
6175     int max=0;
6176     unsigned int threshold1, threshold2;
6177
6178     s->dsp.fdct (block);
6179
6180     if(s->dct_error_sum)
6181         s->denoise_dct(s, block);
6182
6183     if (s->mb_intra) {
6184         if (!s->h263_aic) {
6185             if (n < 4)
6186                 q = s->y_dc_scale;
6187             else
6188                 q = s->c_dc_scale;
6189             q = q << 3;
6190         } else
6191             /* For AIC we skip quant/dequant of INTRADC */
6192             q = 1 << 3;
6193
6194         /* note: block[0] is assumed to be positive */
6195         block[0] = (block[0] + (q >> 1)) / q;
6196         start_i = 1;
6197         last_non_zero = 0;
6198         qmat = s->q_intra_matrix[qscale];
6199         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6200     } else {
6201         start_i = 0;
6202         last_non_zero = -1;
6203         qmat = s->q_inter_matrix[qscale];
6204         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6205     }
6206     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6207     threshold2= (threshold1<<1);
6208     for(i=63;i>=start_i;i--) {
6209         j = scantable[i];
6210         level = block[j] * qmat[j];
6211
6212         if(((unsigned)(level+threshold1))>threshold2){
6213             last_non_zero = i;
6214             break;
6215         }else{
6216             block[j]=0;
6217         }
6218     }
6219     for(i=start_i; i<=last_non_zero; i++) {
6220         j = scantable[i];
6221         level = block[j] * qmat[j];
6222
6223 //        if(   bias+level >= (1<<QMAT_SHIFT)
6224 //           || bias-level >= (1<<QMAT_SHIFT)){
6225         if(((unsigned)(level+threshold1))>threshold2){
6226             if(level>0){
6227                 level= (bias + level)>>QMAT_SHIFT;
6228                 block[j]= level;
6229             }else{
6230                 level= (bias - level)>>QMAT_SHIFT;
6231                 block[j]= -level;
6232             }
6233             max |=level;
6234         }else{
6235             block[j]=0;
6236         }
6237     }
6238     *overflow= s->max_qcoeff < max; //overflow might have happened
6239
6240     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6241     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6242         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6243
6244     return last_non_zero;
6245 }
6246
6247 #endif //CONFIG_ENCODERS
6248
6249 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6250                                    DCTELEM *block, int n, int qscale)
6251 {
6252     int i, level, nCoeffs;
6253     const uint16_t *quant_matrix;
6254
6255     nCoeffs= s->block_last_index[n];
6256
6257     if (n < 4)
6258         block[0] = block[0] * s->y_dc_scale;
6259     else
6260         block[0] = block[0] * s->c_dc_scale;
6261     /* XXX: only mpeg1 */
6262     quant_matrix = s->intra_matrix;
6263     for(i=1;i<=nCoeffs;i++) {
6264         int j= s->intra_scantable.permutated[i];
6265         level = block[j];
6266         if (level) {
6267             if (level < 0) {
6268                 level = -level;
6269                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6270                 level = (level - 1) | 1;
6271                 level = -level;
6272             } else {
6273                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6274                 level = (level - 1) | 1;
6275             }
6276             block[j] = level;
6277         }
6278     }
6279 }
6280
6281 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6282                                    DCTELEM *block, int n, int qscale)
6283 {
6284     int i, level, nCoeffs;
6285     const uint16_t *quant_matrix;
6286
6287     nCoeffs= s->block_last_index[n];
6288
6289     quant_matrix = s->inter_matrix;
6290     for(i=0; i<=nCoeffs; i++) {
6291         int j= s->intra_scantable.permutated[i];
6292         level = block[j];
6293         if (level) {
6294             if (level < 0) {
6295                 level = -level;
6296                 level = (((level << 1) + 1) * qscale *
6297                          ((int) (quant_matrix[j]))) >> 4;
6298                 level = (level - 1) | 1;
6299                 level = -level;
6300             } else {
6301                 level = (((level << 1) + 1) * qscale *
6302                          ((int) (quant_matrix[j]))) >> 4;
6303                 level = (level - 1) | 1;
6304             }
6305             block[j] = level;
6306         }
6307     }
6308 }
6309
6310 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6311                                    DCTELEM *block, int n, int qscale)
6312 {
6313     int i, level, nCoeffs;
6314     const uint16_t *quant_matrix;
6315
6316     if(s->alternate_scan) nCoeffs= 63;
6317     else nCoeffs= s->block_last_index[n];
6318
6319     if (n < 4)
6320         block[0] = block[0] * s->y_dc_scale;
6321     else
6322         block[0] = block[0] * s->c_dc_scale;
6323     quant_matrix = s->intra_matrix;
6324     for(i=1;i<=nCoeffs;i++) {
6325         int j= s->intra_scantable.permutated[i];
6326         level = block[j];
6327         if (level) {
6328             if (level < 0) {
6329                 level = -level;
6330                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6331                 level = -level;
6332             } else {
6333                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6334             }
6335             block[j] = level;
6336         }
6337     }
6338 }
6339
6340 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6341                                    DCTELEM *block, int n, int qscale)
6342 {
6343     int i, level, nCoeffs;
6344     const uint16_t *quant_matrix;
6345     int sum=-1;
6346
6347     if(s->alternate_scan) nCoeffs= 63;
6348     else nCoeffs= s->block_last_index[n];
6349
6350     quant_matrix = s->inter_matrix;
6351     for(i=0; i<=nCoeffs; i++) {
6352         int j= s->intra_scantable.permutated[i];
6353         level = block[j];
6354         if (level) {
6355             if (level < 0) {
6356                 level = -level;
6357                 level = (((level << 1) + 1) * qscale *
6358                          ((int) (quant_matrix[j]))) >> 4;
6359                 level = -level;
6360             } else {
6361                 level = (((level << 1) + 1) * qscale *
6362                          ((int) (quant_matrix[j]))) >> 4;
6363             }
6364             block[j] = level;
6365             sum+=level;
6366         }
6367     }
6368     block[63]^=sum&1;
6369 }
6370
6371 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6372                                   DCTELEM *block, int n, int qscale)
6373 {
6374     int i, level, qmul, qadd;
6375     int nCoeffs;
6376
6377     assert(s->block_last_index[n]>=0);
6378
6379     qmul = qscale << 1;
6380
6381     if (!s->h263_aic) {
6382         if (n < 4)
6383             block[0] = block[0] * s->y_dc_scale;
6384         else
6385             block[0] = block[0] * s->c_dc_scale;
6386         qadd = (qscale - 1) | 1;
6387     }else{
6388         qadd = 0;
6389     }
6390     if(s->ac_pred)
6391         nCoeffs=63;
6392     else
6393         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6394
6395     for(i=1; i<=nCoeffs; i++) {
6396         level = block[i];
6397         if (level) {
6398             if (level < 0) {
6399                 level = level * qmul - qadd;
6400             } else {
6401                 level = level * qmul + qadd;
6402             }
6403             block[i] = level;
6404         }
6405     }
6406 }
6407
6408 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6409                                   DCTELEM *block, int n, int qscale)
6410 {
6411     int i, level, qmul, qadd;
6412     int nCoeffs;
6413
6414     assert(s->block_last_index[n]>=0);
6415
6416     qadd = (qscale - 1) | 1;
6417     qmul = qscale << 1;
6418
6419     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6420
6421     for(i=0; i<=nCoeffs; i++) {
6422         level = block[i];
6423         if (level) {
6424             if (level < 0) {
6425                 level = level * qmul - qadd;
6426             } else {
6427                 level = level * qmul + qadd;
6428             }
6429             block[i] = level;
6430         }
6431     }
6432 }
6433
6434 #ifdef CONFIG_ENCODERS
6435 AVCodec h263_encoder = {
6436     "h263",
6437     CODEC_TYPE_VIDEO,
6438     CODEC_ID_H263,
6439     sizeof(MpegEncContext),
6440     MPV_encode_init,
6441     MPV_encode_picture,
6442     MPV_encode_end,
6443     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6444 };
6445
6446 AVCodec h263p_encoder = {
6447     "h263p",
6448     CODEC_TYPE_VIDEO,
6449     CODEC_ID_H263P,
6450     sizeof(MpegEncContext),
6451     MPV_encode_init,
6452     MPV_encode_picture,
6453     MPV_encode_end,
6454     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6455 };
6456
6457 AVCodec flv_encoder = {
6458     "flv",
6459     CODEC_TYPE_VIDEO,
6460     CODEC_ID_FLV1,
6461     sizeof(MpegEncContext),
6462     MPV_encode_init,
6463     MPV_encode_picture,
6464     MPV_encode_end,
6465     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6466 };
6467
6468 AVCodec rv10_encoder = {
6469     "rv10",
6470     CODEC_TYPE_VIDEO,
6471     CODEC_ID_RV10,
6472     sizeof(MpegEncContext),
6473     MPV_encode_init,
6474     MPV_encode_picture,
6475     MPV_encode_end,
6476     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6477 };
6478
6479 AVCodec rv20_encoder = {
6480     "rv20",
6481     CODEC_TYPE_VIDEO,
6482     CODEC_ID_RV20,
6483     sizeof(MpegEncContext),
6484     MPV_encode_init,
6485     MPV_encode_picture,
6486     MPV_encode_end,
6487     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6488 };
6489
6490 AVCodec mpeg4_encoder = {
6491     "mpeg4",
6492     CODEC_TYPE_VIDEO,
6493     CODEC_ID_MPEG4,
6494     sizeof(MpegEncContext),
6495     MPV_encode_init,
6496     MPV_encode_picture,
6497     MPV_encode_end,
6498     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6499     .capabilities= CODEC_CAP_DELAY,
6500 };
6501
6502 AVCodec msmpeg4v1_encoder = {
6503     "msmpeg4v1",
6504     CODEC_TYPE_VIDEO,
6505     CODEC_ID_MSMPEG4V1,
6506     sizeof(MpegEncContext),
6507     MPV_encode_init,
6508     MPV_encode_picture,
6509     MPV_encode_end,
6510     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6511 };
6512
6513 AVCodec msmpeg4v2_encoder = {
6514     "msmpeg4v2",
6515     CODEC_TYPE_VIDEO,
6516     CODEC_ID_MSMPEG4V2,
6517     sizeof(MpegEncContext),
6518     MPV_encode_init,
6519     MPV_encode_picture,
6520     MPV_encode_end,
6521     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6522 };
6523
6524 AVCodec msmpeg4v3_encoder = {
6525     "msmpeg4",
6526     CODEC_TYPE_VIDEO,
6527     CODEC_ID_MSMPEG4V3,
6528     sizeof(MpegEncContext),
6529     MPV_encode_init,
6530     MPV_encode_picture,
6531     MPV_encode_end,
6532     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6533 };
6534
6535 AVCodec wmv1_encoder = {
6536     "wmv1",
6537     CODEC_TYPE_VIDEO,
6538     CODEC_ID_WMV1,
6539     sizeof(MpegEncContext),
6540     MPV_encode_init,
6541     MPV_encode_picture,
6542     MPV_encode_end,
6543     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6544 };
6545
6546 AVCodec mjpeg_encoder = {
6547     "mjpeg",
6548     CODEC_TYPE_VIDEO,
6549     CODEC_ID_MJPEG,
6550     sizeof(MpegEncContext),
6551     MPV_encode_init,
6552     MPV_encode_picture,
6553     MPV_encode_end,
6554     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6555 };
6556
6557 #endif //CONFIG_ENCODERS