git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57 #ifdef CONFIG_ENCODERS
  58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  63 #endif //CONFIG_ENCODERS
  64
  65 #ifdef HAVE_XVMC
  66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  67 extern void XVMC_field_end(MpegEncContext *s);
  68 extern void XVMC_decode_mb(MpegEncContext *s);
  69 #endif
  70
  71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  72
  73
  74 /* enable all paranoid tests for rounding, overflows, etc... */
  75 //#define PARANOID
  76
  77 //#define DEBUG
  78
  79
  80 /* for jpeg fast DCT */
  81 #define CONST_BITS 14
  82
  83 static const uint16_t aanscales[64] = {
  84     /* precomputed values scaled up by 14 bits */
  85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  93 };
  94
  95 static const uint8_t h263_chroma_roundtab[16] = {
  96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  98 };
  99
 100 static const uint8_t ff_default_chroma_qscale_table[32]={
 101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 103 };
 104
 105 #ifdef CONFIG_ENCODERS
 106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 107 static uint8_t default_fcode_tab[MAX_MV*2+1];
 108
 109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 110
 111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
 113 {
 114     int qscale;
 115     int shift=0;
 116
 117     for(qscale=qmin; qscale<=qmax; qscale++){
 118         int i;
 119         if (dsp->fdct == ff_jpeg_fdct_islow
 120 #ifdef FAAN_POSTSCALE
 121             || dsp->fdct == ff_faandct
 122 #endif
 123             ) {
 124             for(i=0;i<64;i++) {
 125                 const int j= dsp->idct_permutation[i];
 126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 130
 131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 132                                 (qscale * quant_matrix[j]));
 133             }
 134         } else if (dsp->fdct == fdct_ifast
 135 #ifndef FAAN_POSTSCALE
 136                    || dsp->fdct == ff_faandct
 137 #endif
 138                    ) {
 139             for(i=0;i<64;i++) {
 140                 const int j= dsp->idct_permutation[i];
 141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 145
 146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 147                                 (aanscales[i] * qscale * quant_matrix[j]));
 148             }
 149         } else {
 150             for(i=0;i<64;i++) {
 151                 const int j= dsp->idct_permutation[i];
 152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 153                    So 16           <= qscale * quant_matrix[i]             <= 7905
 154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 156                 */
 157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 160
 161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 163             }
 164         }
 165
 166         for(i=intra; i<64; i++){
 167             int64_t max= 8191;
 168             if (dsp->fdct == fdct_ifast
 169 #ifndef FAAN_POSTSCALE
 170                    || dsp->fdct == ff_faandct
 171 #endif
 172                    ) {
 173                 max= (8191LL*aanscales[i]) >> 14;
 174             }
 175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
 176                 shift++;
 177             }
 178         }
 179     }
 180     if(shift){
 181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
 182     }
 183 }
 184
 185 static inline void update_qscale(MpegEncContext *s){
 186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 188
 189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 190 }
 191 #endif //CONFIG_ENCODERS
 192
 193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 194     int i;
 195     int end;
 196
 197     st->scantable= src_scantable;
 198
 199     for(i=0; i<64; i++){
 200         int j;
 201         j = src_scantable[i];
 202         st->permutated[i] = permutation[j];
 203 #ifdef ARCH_POWERPC
 204         st->inverse[j] = i;
 205 #endif
 206     }
 207
 208     end=-1;
 209     for(i=0; i<64; i++){
 210         int j;
 211         j = st->permutated[i];
 212         if(j>end) end=j;
 213         st->raster_end[i]= end;
 214     }
 215 }
 216
 217 #ifdef CONFIG_ENCODERS
 218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 219     int i;
 220
 221     if(matrix){
 222         put_bits(pb, 1, 1);
 223         for(i=0;i<64;i++) {
 224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 225         }
 226     }else
 227         put_bits(pb, 1, 0);
 228 }
 229 #endif //CONFIG_ENCODERS
 230
 231 /* init common dct for both encoder and decoder */
 232 int DCT_common_init(MpegEncContext *s)
 233 {
 234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 240
 241 #ifdef CONFIG_ENCODERS
 242     s->dct_quantize= dct_quantize_c;
 243     s->denoise_dct= denoise_dct_c;
 244 #endif
 245
 246 #ifdef HAVE_MMX
 247     MPV_common_init_mmx(s);
 248 #endif
 249 #ifdef ARCH_ALPHA
 250     MPV_common_init_axp(s);
 251 #endif
 252 #ifdef HAVE_MLIB
 253     MPV_common_init_mlib(s);
 254 #endif
 255 #ifdef HAVE_MMI
 256     MPV_common_init_mmi(s);
 257 #endif
 258 #ifdef ARCH_ARMV4L
 259     MPV_common_init_armv4l(s);
 260 #endif
 261 #ifdef ARCH_POWERPC
 262     MPV_common_init_ppc(s);
 263 #endif
 264
 265 #ifdef CONFIG_ENCODERS
 266     s->fast_dct_quantize= s->dct_quantize;
 267
 268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 270     }
 271
 272 #endif //CONFIG_ENCODERS
 273
 274     /* load & permutate scantables
 275        note: only wmv uses differnt ones
 276     */
 277     if(s->alternate_scan){
 278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 280     }else{
 281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 283     }
 284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 286
 287     return 0;
 288 }
 289
 290 static void copy_picture(Picture *dst, Picture *src){
 291     *dst = *src;
 292     dst->type= FF_BUFFER_TYPE_COPY;
 293 }
 294
 295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 296     int i;
 297
 298     dst->pict_type              = src->pict_type;
 299     dst->quality                = src->quality;
 300     dst->coded_picture_number   = src->coded_picture_number;
 301     dst->display_picture_number = src->display_picture_number;
 302 //    dst->reference              = src->reference;
 303     dst->pts                    = src->pts;
 304     dst->interlaced_frame       = src->interlaced_frame;
 305     dst->top_field_first        = src->top_field_first;
 306
 307     if(s->avctx->me_threshold){
 308         if(!src->motion_val[0])
 309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 310         if(!src->mb_type)
 311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 312         if(!src->ref_index[0])
 313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
 316             src->motion_subsample_log2, dst->motion_subsample_log2);
 317
 318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 319
 320         for(i=0; i<2; i++){
 321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 323
 324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 326             }
 327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 329             }
 330         }
 331     }
 332 }
 333
 334 /**
 335  * allocates a Picture
 336  * The pixels are allocated/set by calling get_buffer() if shared=0
 337  */
 338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 340     const int mb_array_size= s->mb_stride*s->mb_height;
 341     const int b8_array_size= s->b8_stride*s->mb_height*2;
 342     const int b4_array_size= s->b4_stride*s->mb_height*4;
 343     int i;
 344
 345     if(shared){
 346         assert(pic->data[0]);
 347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 348         pic->type= FF_BUFFER_TYPE_SHARED;
 349     }else{
 350         int r;
 351
 352         assert(!pic->data[0]);
 353
 354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 355
 356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 358             return -1;
 359         }
 360
 361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 363             return -1;
 364         }
 365
 366         if(pic->linesize[1] != pic->linesize[2]){
 367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
 368             return -1;
 369         }
 370
 371         s->linesize  = pic->linesize[0];
 372         s->uvlinesize= pic->linesize[1];
 373     }
 374
 375     if(pic->qscale_table==NULL){
 376         if (s->encoding) {
 377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 380         }
 381
 382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 386         if(s->out_format == FMT_H264){
 387             for(i=0; i<2; i++){
 388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
 389                 pic->motion_val[i]= pic->motion_val_base[i]+2;
 390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 391             }
 392             pic->motion_subsample_log2= 2;
 393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 394             for(i=0; i<2; i++){
 395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t))
 396                 pic->motion_val[i]= pic->motion_val_base[i]+2;
 397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 398             }
 399             pic->motion_subsample_log2= 3;
 400         }
 401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 403         }
 404         pic->qstride= s->mb_stride;
 405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 406     }
 407
 408     //it might be nicer if the application would keep track of these but it would require a API change
 409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 410     s->prev_pict_types[0]= s->pict_type;
 411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 412         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 413
 414     return 0;
 415 fail: //for the CHECKED_ALLOCZ macro
 416     return -1;
 417 }
 418
 419 /**
 420  * deallocates a picture
 421  */
 422 static void free_picture(MpegEncContext *s, Picture *pic){
 423     int i;
 424
 425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 427     }
 428
 429     av_freep(&pic->mb_var);
 430     av_freep(&pic->mc_mb_var);
 431     av_freep(&pic->mb_mean);
 432     av_freep(&pic->mbskip_table);
 433     av_freep(&pic->qscale_table);
 434     av_freep(&pic->mb_type_base);
 435     av_freep(&pic->dct_coeff);
 436     av_freep(&pic->pan_scan);
 437     pic->mb_type= NULL;
 438     for(i=0; i<2; i++){
 439         av_freep(&pic->motion_val_base[i]);
 440         av_freep(&pic->ref_index[i]);
 441     }
 442
 443     if(pic->type == FF_BUFFER_TYPE_SHARED){
 444         for(i=0; i<4; i++){
 445             pic->base[i]=
 446             pic->data[i]= NULL;
 447         }
 448         pic->type= 0;
 449     }
 450 }
 451
 452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 453     int i;
 454
 455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 458
 459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 461     s->rd_scratchpad=   s->me.scratchpad;
 462     s->b_scratchpad=    s->me.scratchpad;
 463     s->obmc_scratchpad= s->me.scratchpad + 16;
 464     if (s->encoding) {
 465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 467         if(s->avctx->noise_reduction){
 468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 469         }
 470     }
 471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 472     s->block= s->blocks[0];
 473
 474     for(i=0;i<12;i++){
 475         s->pblocks[i] = (short *)(&s->block[i]);
 476     }
 477     return 0;
 478 fail:
 479     return -1; //free() through MPV_common_end()
 480 }
 481
 482 static void free_duplicate_context(MpegEncContext *s){
 483     if(s==NULL) return;
 484
 485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 486     av_freep(&s->me.scratchpad);
 487     s->rd_scratchpad=
 488     s->b_scratchpad=
 489     s->obmc_scratchpad= NULL;
 490
 491     av_freep(&s->dct_error_sum);
 492     av_freep(&s->me.map);
 493     av_freep(&s->me.score_map);
 494     av_freep(&s->blocks);
 495     s->block= NULL;
 496 }
 497
 498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 499 #define COPY(a) bak->a= src->a
 500     COPY(allocated_edge_emu_buffer);
 501     COPY(edge_emu_buffer);
 502     COPY(me.scratchpad);
 503     COPY(rd_scratchpad);
 504     COPY(b_scratchpad);
 505     COPY(obmc_scratchpad);
 506     COPY(me.map);
 507     COPY(me.score_map);
 508     COPY(blocks);
 509     COPY(block);
 510     COPY(start_mb_y);
 511     COPY(end_mb_y);
 512     COPY(me.map_generation);
 513     COPY(pb);
 514     COPY(dct_error_sum);
 515     COPY(dct_count[0]);
 516     COPY(dct_count[1]);
 517 #undef COPY
 518 }
 519
 520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 521     MpegEncContext bak;
 522     int i;
 523     //FIXME copy only needed parts
 524 //START_TIMER
 525     backup_duplicate_context(&bak, dst);
 526     memcpy(dst, src, sizeof(MpegEncContext));
 527     backup_duplicate_context(dst, &bak);
 528     for(i=0;i<12;i++){
 529         dst->pblocks[i] = (short *)(&dst->block[i]);
 530     }
 531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 532 }
 533
 534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 535 #define COPY(a) dst->a= src->a
 536     COPY(pict_type);
 537     COPY(current_picture);
 538     COPY(f_code);
 539     COPY(b_code);
 540     COPY(qscale);
 541     COPY(lambda);
 542     COPY(lambda2);
 543     COPY(picture_in_gop_number);
 544     COPY(gop_picture_number);
 545     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
 546     COPY(progressive_frame); //FIXME dont set in encode_header
 547     COPY(partitioned_frame); //FIXME dont set in encode_header
 548 #undef COPY
 549 }
 550
 551 /**
 552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 553  * the changed fields will not depend upon the prior state of the MpegEncContext.
 554  */
 555 static void MPV_common_defaults(MpegEncContext *s){
 556     s->y_dc_scale_table=
 557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 559     s->progressive_frame= 1;
 560     s->progressive_sequence= 1;
 561     s->picture_structure= PICT_FRAME;
 562
 563     s->coded_picture_number = 0;
 564     s->picture_number = 0;
 565     s->input_picture_number = 0;
 566
 567     s->picture_in_gop_number = 0;
 568
 569     s->f_code = 1;
 570     s->b_code = 1;
 571 }
 572
 573 /**
 574  * sets the given MpegEncContext to defaults for decoding.
 575  * the changed fields will not depend upon the prior state of the MpegEncContext.
 576  */
 577 void MPV_decode_defaults(MpegEncContext *s){
 578     MPV_common_defaults(s);
 579 }
 580
 581 /**
 582  * sets the given MpegEncContext to defaults for encoding.
 583  * the changed fields will not depend upon the prior state of the MpegEncContext.
 584  */
 585
 586 #ifdef CONFIG_ENCODERS
 587 static void MPV_encode_defaults(MpegEncContext *s){
 588     static int done=0;
 589
 590     MPV_common_defaults(s);
 591
 592     if(!done){
 593         int i;
 594         done=1;
 595
 596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 598
 599         for(i=-16; i<16; i++){
 600             default_fcode_tab[i + MAX_MV]= 1;
 601         }
 602     }
 603     s->me.mv_penalty= default_mv_penalty;
 604     s->fcode_tab= default_fcode_tab;
 605 }
 606 #endif //CONFIG_ENCODERS
 607
 608 /**
 609  * init common structure for both encoder and decoder.
 610  * this assumes that some variables like width/height are already set
 611  */
 612 int MPV_common_init(MpegEncContext *s)
 613 {
 614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 615
 616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 618         return -1;
 619     }
 620
 621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 622         return -1;
 623
 624     dsputil_init(&s->dsp, s->avctx);
 625     DCT_common_init(s);
 626
 627     s->flags= s->avctx->flags;
 628     s->flags2= s->avctx->flags2;
 629
 630     s->mb_width  = (s->width  + 15) / 16;
 631     s->mb_height = (s->height + 15) / 16;
 632     s->mb_stride = s->mb_width + 1;
 633     s->b8_stride = s->mb_width*2 + 1;
 634     s->b4_stride = s->mb_width*4 + 1;
 635     mb_array_size= s->mb_height * s->mb_stride;
 636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 637
 638     /* set chroma shifts */
 639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 640                                                     &(s->chroma_y_shift) );
 641
 642     /* set default edge pos, will be overriden in decode_header if needed */
 643     s->h_edge_pos= s->mb_width*16;
 644     s->v_edge_pos= s->mb_height*16;
 645
 646     s->mb_num = s->mb_width * s->mb_height;
 647
 648     s->block_wrap[0]=
 649     s->block_wrap[1]=
 650     s->block_wrap[2]=
 651     s->block_wrap[3]= s->b8_stride;
 652     s->block_wrap[4]=
 653     s->block_wrap[5]= s->mb_stride;
 654
 655     y_size = s->b8_stride * (2 * s->mb_height + 1);
 656     c_size = s->mb_stride * (s->mb_height + 1);
 657     yc_size = y_size + 2 * c_size;
 658
 659     /* convert fourcc to upper case */
 660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 664
 665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 669
 670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 671
 672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 673     for(y=0; y<s->mb_height; y++){
 674         for(x=0; x<s->mb_width; x++){
 675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 676         }
 677     }
 678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 679
 680     if (s->encoding) {
 681         /* Allocate MV tables */
 682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 694
 695         if(s->msmpeg4_version){
 696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 697         }
 698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 699
 700         /* Allocate MB type table */
 701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 702
 703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 704
 705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 711
 712         if(s->avctx->noise_reduction){
 713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 714         }
 715     }
 716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 717
 718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 719
 720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 721         /* interlaced direct mode decoding tables */
 722             for(i=0; i<2; i++){
 723                 int j, k;
 724                 for(j=0; j<2; j++){
 725                     for(k=0; k<2; k++){
 726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 728                     }
 729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 732                 }
 733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 734             }
 735     }
 736     if (s->out_format == FMT_H263) {
 737         /* ac values */
 738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 741         s->ac_val[2] = s->ac_val[1] + c_size;
 742
 743         /* cbp values */
 744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 746
 747         /* cbp, ac_pred, pred_dir */
 748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 750     }
 751
 752     if (s->h263_pred || s->h263_plus || !s->encoding) {
 753         /* dc values */
 754         //MN: we need these for error resilience of intra-frames
 755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 758         s->dc_val[2] = s->dc_val[1] + c_size;
 759         for(i=0;i<yc_size;i++)
 760             s->dc_val_base[i] = 1024;
 761     }
 762
 763     /* which mb is a intra block */
 764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 765     memset(s->mbintra_table, 1, mb_array_size);
 766
 767     /* init macroblock skip table */
 768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 769     //Note the +1 is for a quicker mpeg4 slice_end detection
 770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 771
 772     s->parse_context.state= -1;
 773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 777     }
 778
 779     s->context_initialized = 1;
 780
 781     s->thread_context[0]= s;
 782     for(i=1; i<s->avctx->thread_count; i++){
 783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 785     }
 786
 787     for(i=0; i<s->avctx->thread_count; i++){
 788         if(init_duplicate_context(s->thread_context[i], s) < 0)
 789            goto fail;
 790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 792     }
 793
 794     return 0;
 795  fail:
 796     MPV_common_end(s);
 797     return -1;
 798 }
 799
 800 /* init common structure for both encoder and decoder */
 801 void MPV_common_end(MpegEncContext *s)
 802 {
 803     int i, j, k;
 804
 805     for(i=0; i<s->avctx->thread_count; i++){
 806         free_duplicate_context(s->thread_context[i]);
 807     }
 808     for(i=1; i<s->avctx->thread_count; i++){
 809         av_freep(&s->thread_context[i]);
 810     }
 811
 812     av_freep(&s->parse_context.buffer);
 813     s->parse_context.buffer_size=0;
 814
 815     av_freep(&s->mb_type);
 816     av_freep(&s->p_mv_table_base);
 817     av_freep(&s->b_forw_mv_table_base);
 818     av_freep(&s->b_back_mv_table_base);
 819     av_freep(&s->b_bidir_forw_mv_table_base);
 820     av_freep(&s->b_bidir_back_mv_table_base);
 821     av_freep(&s->b_direct_mv_table_base);
 822     s->p_mv_table= NULL;
 823     s->b_forw_mv_table= NULL;
 824     s->b_back_mv_table= NULL;
 825     s->b_bidir_forw_mv_table= NULL;
 826     s->b_bidir_back_mv_table= NULL;
 827     s->b_direct_mv_table= NULL;
 828     for(i=0; i<2; i++){
 829         for(j=0; j<2; j++){
 830             for(k=0; k<2; k++){
 831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 832                 s->b_field_mv_table[i][j][k]=NULL;
 833             }
 834             av_freep(&s->b_field_select_table[i][j]);
 835             av_freep(&s->p_field_mv_table_base[i][j]);
 836             s->p_field_mv_table[i][j]=NULL;
 837         }
 838         av_freep(&s->p_field_select_table[i]);
 839     }
 840
 841     av_freep(&s->dc_val_base);
 842     av_freep(&s->ac_val_base);
 843     av_freep(&s->coded_block_base);
 844     av_freep(&s->mbintra_table);
 845     av_freep(&s->cbp_table);
 846     av_freep(&s->pred_dir_table);
 847
 848     av_freep(&s->mbskip_table);
 849     av_freep(&s->prev_pict_types);
 850     av_freep(&s->bitstream_buffer);
 851     s->allocated_bitstream_buffer_size=0;
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855     av_freep(&s->error_status_table);
 856     av_freep(&s->mb_index2xy);
 857     av_freep(&s->lambda_table);
 858     av_freep(&s->q_intra_matrix);
 859     av_freep(&s->q_inter_matrix);
 860     av_freep(&s->q_intra_matrix16);
 861     av_freep(&s->q_inter_matrix16);
 862     av_freep(&s->input_picture);
 863     av_freep(&s->reordered_input_picture);
 864     av_freep(&s->dct_offset);
 865
 866     if(s->picture){
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             free_picture(s, &s->picture[i]);
 869         }
 870     }
 871     av_freep(&s->picture);
 872     s->context_initialized = 0;
 873     s->last_picture_ptr=
 874     s->next_picture_ptr=
 875     s->current_picture_ptr= NULL;
 876     s->linesize= s->uvlinesize= 0;
 877
 878     for(i=0; i<3; i++)
 879         av_freep(&s->visualization_buffer[i]);
 880
 881     avcodec_default_free_buffers(s->avctx);
 882 }
 883
 884 #ifdef CONFIG_ENCODERS
 885
 886 /* init video encoder */
 887 int MPV_encode_init(AVCodecContext *avctx)
 888 {
 889     MpegEncContext *s = avctx->priv_data;
 890     int i, dummy;
 891     int chroma_h_shift, chroma_v_shift;
 892
 893     MPV_encode_defaults(s);
 894
 895     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
 896
 897     s->bit_rate = avctx->bit_rate;
 898     s->width = avctx->width;
 899     s->height = avctx->height;
 900     if(avctx->gop_size > 600){
 901         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 902         avctx->gop_size=600;
 903     }
 904     s->gop_size = avctx->gop_size;
 905     s->avctx = avctx;
 906     s->flags= avctx->flags;
 907     s->flags2= avctx->flags2;
 908     s->max_b_frames= avctx->max_b_frames;
 909     s->codec_id= avctx->codec->id;
 910     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 911     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 912     s->strict_std_compliance= avctx->strict_std_compliance;
 913     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 914     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 915     s->mpeg_quant= avctx->mpeg_quant;
 916     s->rtp_mode= !!avctx->rtp_payload_size;
 917     s->intra_dc_precision= avctx->intra_dc_precision;
 918     s->user_specified_pts = AV_NOPTS_VALUE;
 919
 920     if (s->gop_size <= 1) {
 921         s->intra_only = 1;
 922         s->gop_size = 12;
 923     } else {
 924         s->intra_only = 0;
 925     }
 926
 927     s->me_method = avctx->me_method;
 928
 929     /* Fixed QSCALE */
 930     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 931
 932     s->adaptive_quant= (   s->avctx->lumi_masking
 933                         || s->avctx->dark_masking
 934                         || s->avctx->temporal_cplx_masking
 935                         || s->avctx->spatial_cplx_masking
 936                         || s->avctx->p_masking
 937                         || (s->flags&CODEC_FLAG_QP_RD))
 938                        && !s->fixed_qscale;
 939
 940     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 941     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 942     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 943
 944     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 945         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 946         return -1;
 947     }
 948
 949     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 950         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
 951     }
 952
 953     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 954         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 955         return -1;
 956     }
 957
 958     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 959         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 960         return -1;
 961     }
 962
 963     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 964        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 965        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 966
 967         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 968     }
 969
 970     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 971        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 972         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 973         return -1;
 974     }
 975
 976     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 977         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
 978         return -1;
 979     }
 980
 981     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 982         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 983         return -1;
 984     }
 985
 986     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
 987         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 988         return -1;
 989     }
 990
 991     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
 992         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
 993         return -1;
 994     }
 995
 996     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
 997         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 998         return -1;
 999     }
1000
1001     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1002        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1003         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1008         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
1009         return -1;
1010     }
1011
1012     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1013         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1018         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1019         return -1;
1020     }
1021
1022     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1023         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1024         return -1;
1025     }
1026
1027     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1028        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1029        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1030         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1031         return -1;
1032     }
1033
1034     if(s->avctx->thread_count > 1)
1035         s->rtp_mode= 1;
1036
1037     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1038     if(i > 1){
1039         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1040         avctx->frame_rate /= i;
1041         avctx->frame_rate_base /= i;
1042 //        return -1;
1043     }
1044
1045     if(s->codec_id==CODEC_ID_MJPEG){
1046         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1047         s->inter_quant_bias= 0;
1048     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1049         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1050         s->inter_quant_bias= 0;
1051     }else{
1052         s->intra_quant_bias=0;
1053         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1054     }
1055
1056     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1057         s->intra_quant_bias= avctx->intra_quant_bias;
1058     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1059         s->inter_quant_bias= avctx->inter_quant_bias;
1060
1061     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1062
1063     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1064     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1065
1066     switch(avctx->codec->id) {
1067     case CODEC_ID_MPEG1VIDEO:
1068         s->out_format = FMT_MPEG1;
1069         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1070         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1071         break;
1072     case CODEC_ID_MPEG2VIDEO:
1073         s->out_format = FMT_MPEG1;
1074         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1075         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1076         s->rtp_mode= 1;
1077         break;
1078     case CODEC_ID_LJPEG:
1079     case CODEC_ID_MJPEG:
1080         s->out_format = FMT_MJPEG;
1081         s->intra_only = 1; /* force intra only for jpeg */
1082         s->mjpeg_write_tables = 1; /* write all tables */
1083         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1084         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1085         s->mjpeg_vsample[1] = 1;
1086         s->mjpeg_vsample[2] = 1;
1087         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1088         s->mjpeg_hsample[1] = 1;
1089         s->mjpeg_hsample[2] = 1;
1090         if (mjpeg_init(s) < 0)
1091             return -1;
1092         avctx->delay=0;
1093         s->low_delay=1;
1094         break;
1095 #ifdef CONFIG_RISKY
1096     case CODEC_ID_H261:
1097         s->out_format = FMT_H261;
1098         avctx->delay=0;
1099         s->low_delay=1;
1100         break;
1101     case CODEC_ID_H263:
1102         if (h263_get_picture_format(s->width, s->height) == 7) {
1103             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1104             return -1;
1105         }
1106         s->out_format = FMT_H263;
1107         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1108         avctx->delay=0;
1109         s->low_delay=1;
1110         break;
1111     case CODEC_ID_H263P:
1112         s->out_format = FMT_H263;
1113         s->h263_plus = 1;
1114         /* Fx */
1115         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1116         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1117         s->modified_quant= s->h263_aic;
1118         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1119         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1120         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1121         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1122         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1123
1124         /* /Fx */
1125         /* These are just to be sure */
1126         avctx->delay=0;
1127         s->low_delay=1;
1128         break;
1129     case CODEC_ID_FLV1:
1130         s->out_format = FMT_H263;
1131         s->h263_flv = 2; /* format = 1; 11-bit codes */
1132         s->unrestricted_mv = 1;
1133         s->rtp_mode=0; /* don't allow GOB */
1134         avctx->delay=0;
1135         s->low_delay=1;
1136         break;
1137     case CODEC_ID_RV10:
1138         s->out_format = FMT_H263;
1139         avctx->delay=0;
1140         s->low_delay=1;
1141         break;
1142     case CODEC_ID_RV20:
1143         s->out_format = FMT_H263;
1144         avctx->delay=0;
1145         s->low_delay=1;
1146         s->modified_quant=1;
1147         s->h263_aic=1;
1148         s->h263_plus=1;
1149         s->loop_filter=1;
1150         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1151         break;
1152     case CODEC_ID_MPEG4:
1153         s->out_format = FMT_H263;
1154         s->h263_pred = 1;
1155         s->unrestricted_mv = 1;
1156         s->low_delay= s->max_b_frames ? 0 : 1;
1157         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1158         break;
1159     case CODEC_ID_MSMPEG4V1:
1160         s->out_format = FMT_H263;
1161         s->h263_msmpeg4 = 1;
1162         s->h263_pred = 1;
1163         s->unrestricted_mv = 1;
1164         s->msmpeg4_version= 1;
1165         avctx->delay=0;
1166         s->low_delay=1;
1167         break;
1168     case CODEC_ID_MSMPEG4V2:
1169         s->out_format = FMT_H263;
1170         s->h263_msmpeg4 = 1;
1171         s->h263_pred = 1;
1172         s->unrestricted_mv = 1;
1173         s->msmpeg4_version= 2;
1174         avctx->delay=0;
1175         s->low_delay=1;
1176         break;
1177     case CODEC_ID_MSMPEG4V3:
1178         s->out_format = FMT_H263;
1179         s->h263_msmpeg4 = 1;
1180         s->h263_pred = 1;
1181         s->unrestricted_mv = 1;
1182         s->msmpeg4_version= 3;
1183         s->flipflop_rounding=1;
1184         avctx->delay=0;
1185         s->low_delay=1;
1186         break;
1187     case CODEC_ID_WMV1:
1188         s->out_format = FMT_H263;
1189         s->h263_msmpeg4 = 1;
1190         s->h263_pred = 1;
1191         s->unrestricted_mv = 1;
1192         s->msmpeg4_version= 4;
1193         s->flipflop_rounding=1;
1194         avctx->delay=0;
1195         s->low_delay=1;
1196         break;
1197     case CODEC_ID_WMV2:
1198         s->out_format = FMT_H263;
1199         s->h263_msmpeg4 = 1;
1200         s->h263_pred = 1;
1201         s->unrestricted_mv = 1;
1202         s->msmpeg4_version= 5;
1203         s->flipflop_rounding=1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207 #endif
1208     default:
1209         return -1;
1210     }
1211
1212     avctx->has_b_frames= !s->low_delay;
1213
1214     s->encoding = 1;
1215
1216     /* init */
1217     if (MPV_common_init(s) < 0)
1218         return -1;
1219
1220     if(s->modified_quant)
1221         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1222     s->progressive_frame=
1223     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1224     s->quant_precision=5;
1225
1226     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1227     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1228
1229 #ifdef CONFIG_ENCODERS
1230 #ifdef CONFIG_RISKY
1231     if (s->out_format == FMT_H261)
1232         ff_h261_encode_init(s);
1233     if (s->out_format == FMT_H263)
1234         h263_encode_init(s);
1235     if(s->msmpeg4_version)
1236         ff_msmpeg4_encode_init(s);
1237 #endif
1238     if (s->out_format == FMT_MPEG1)
1239         ff_mpeg1_encode_init(s);
1240 #endif
1241
1242     /* init q matrix */
1243     for(i=0;i<64;i++) {
1244         int j= s->dsp.idct_permutation[i];
1245 #ifdef CONFIG_RISKY
1246         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1247             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1248             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1249         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1250             s->intra_matrix[j] =
1251             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1252         }else
1253 #endif
1254         { /* mpeg1/2 */
1255             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1256             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1257         }
1258         if(s->avctx->intra_matrix)
1259             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1260         if(s->avctx->inter_matrix)
1261             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1262     }
1263
1264     /* precompute matrix */
1265     /* for mjpeg, we do include qscale in the matrix */
1266     if (s->out_format != FMT_MJPEG) {
1267         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1268                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1269         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1270                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1271     }
1272
1273     if(ff_rate_control_init(s) < 0)
1274         return -1;
1275
1276     return 0;
1277 }
1278
1279 int MPV_encode_end(AVCodecContext *avctx)
1280 {
1281     MpegEncContext *s = avctx->priv_data;
1282
1283 #ifdef STATS
1284     print_stats();
1285 #endif
1286
1287     ff_rate_control_uninit(s);
1288
1289     MPV_common_end(s);
1290     if (s->out_format == FMT_MJPEG)
1291         mjpeg_close(s);
1292
1293     av_freep(&avctx->extradata);
1294
1295     return 0;
1296 }
1297
1298 #endif //CONFIG_ENCODERS
1299
1300 void init_rl(RLTable *rl, int use_static)
1301 {
1302     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1303     uint8_t index_run[MAX_RUN+1];
1304     int last, run, level, start, end, i;
1305
1306     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1307     if(use_static && rl->max_level[0])
1308         return;
1309
1310     /* compute max_level[], max_run[] and index_run[] */
1311     for(last=0;last<2;last++) {
1312         if (last == 0) {
1313             start = 0;
1314             end = rl->last;
1315         } else {
1316             start = rl->last;
1317             end = rl->n;
1318         }
1319
1320         memset(max_level, 0, MAX_RUN + 1);
1321         memset(max_run, 0, MAX_LEVEL + 1);
1322         memset(index_run, rl->n, MAX_RUN + 1);
1323         for(i=start;i<end;i++) {
1324             run = rl->table_run[i];
1325             level = rl->table_level[i];
1326             if (index_run[run] == rl->n)
1327                 index_run[run] = i;
1328             if (level > max_level[run])
1329                 max_level[run] = level;
1330             if (run > max_run[level])
1331                 max_run[level] = run;
1332         }
1333         if(use_static)
1334             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1335         else
1336             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1337         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1338         if(use_static)
1339             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1340         else
1341             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1342         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1343         if(use_static)
1344             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1345         else
1346             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1347         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1348     }
1349 }
1350
1351 /* draw the edges of width 'w' of an image of size width, height */
1352 //FIXME check that this is ok for mpeg4 interlaced
1353 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1354 {
1355     uint8_t *ptr, *last_line;
1356     int i;
1357
1358     last_line = buf + (height - 1) * wrap;
1359     for(i=0;i<w;i++) {
1360         /* top and bottom */
1361         memcpy(buf - (i + 1) * wrap, buf, width);
1362         memcpy(last_line + (i + 1) * wrap, last_line, width);
1363     }
1364     /* left and right */
1365     ptr = buf;
1366     for(i=0;i<height;i++) {
1367         memset(ptr - w, ptr[0], w);
1368         memset(ptr + width, ptr[width-1], w);
1369         ptr += wrap;
1370     }
1371     /* corners */
1372     for(i=0;i<w;i++) {
1373         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1374         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1375         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1376         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1377     }
1378 }
1379
1380 int ff_find_unused_picture(MpegEncContext *s, int shared){
1381     int i;
1382
1383     if(shared){
1384         for(i=0; i<MAX_PICTURE_COUNT; i++){
1385             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1386         }
1387     }else{
1388         for(i=0; i<MAX_PICTURE_COUNT; i++){
1389             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1390         }
1391         for(i=0; i<MAX_PICTURE_COUNT; i++){
1392             if(s->picture[i].data[0]==NULL) return i;
1393         }
1394     }
1395
1396     assert(0);
1397     return -1;
1398 }
1399
1400 static void update_noise_reduction(MpegEncContext *s){
1401     int intra, i;
1402
1403     for(intra=0; intra<2; intra++){
1404         if(s->dct_count[intra] > (1<<16)){
1405             for(i=0; i<64; i++){
1406                 s->dct_error_sum[intra][i] >>=1;
1407             }
1408             s->dct_count[intra] >>= 1;
1409         }
1410
1411         for(i=0; i<64; i++){
1412             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1413         }
1414     }
1415 }
1416
1417 /**
1418  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1419  */
1420 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1421 {
1422     int i;
1423     AVFrame *pic;
1424     s->mb_skiped = 0;
1425
1426     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1427
1428     /* mark&release old frames */
1429     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1430         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1431
1432         /* release forgotten pictures */
1433         /* if(mpeg124/h263) */
1434         if(!s->encoding){
1435             for(i=0; i<MAX_PICTURE_COUNT; i++){
1436                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1437                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1438                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1439                 }
1440             }
1441         }
1442     }
1443 alloc:
1444     if(!s->encoding){
1445         /* release non refernce frames */
1446         for(i=0; i<MAX_PICTURE_COUNT; i++){
1447             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1448                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1449             }
1450         }
1451
1452         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1453             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1454         else{
1455             i= ff_find_unused_picture(s, 0);
1456             pic= (AVFrame*)&s->picture[i];
1457         }
1458
1459         pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0;
1460
1461         pic->coded_picture_number= s->coded_picture_number++;
1462
1463         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1464             return -1;
1465
1466         s->current_picture_ptr= (Picture*)pic;
1467         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1468         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1469     }
1470
1471     s->current_picture_ptr->pict_type= s->pict_type;
1472 //    if(s->flags && CODEC_FLAG_QSCALE)
1473   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1474     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1475
1476     copy_picture(&s->current_picture, s->current_picture_ptr);
1477
1478   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1479     if (s->pict_type != B_TYPE) {
1480         s->last_picture_ptr= s->next_picture_ptr;
1481         if(!s->dropable)
1482             s->next_picture_ptr= s->current_picture_ptr;
1483     }
1484 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1485         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1486         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1487         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1488         s->pict_type, s->dropable);*/
1489
1490     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1491     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1492
1493     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1494         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1495         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1496         goto alloc;
1497     }
1498
1499     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1500
1501     if(s->picture_structure!=PICT_FRAME){
1502         int i;
1503         for(i=0; i<4; i++){
1504             if(s->picture_structure == PICT_BOTTOM_FIELD){
1505                  s->current_picture.data[i] += s->current_picture.linesize[i];
1506             }
1507             s->current_picture.linesize[i] *= 2;
1508             s->last_picture.linesize[i] *=2;
1509             s->next_picture.linesize[i] *=2;
1510         }
1511     }
1512   }
1513
1514     s->hurry_up= s->avctx->hurry_up;
1515     s->error_resilience= avctx->error_resilience;
1516
1517     /* set dequantizer, we cant do it during init as it might change for mpeg4
1518        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1519     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1520         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1521         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1522     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1523         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1524         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1525     }else{
1526         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1527         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1528     }
1529
1530     if(s->dct_error_sum){
1531         assert(s->avctx->noise_reduction && s->encoding);
1532
1533         update_noise_reduction(s);
1534     }
1535
1536 #ifdef HAVE_XVMC
1537     if(s->avctx->xvmc_acceleration)
1538         return XVMC_field_start(s, avctx);
1539 #endif
1540     return 0;
1541 }
1542
1543 /* generic function for encode/decode called after a frame has been coded/decoded */
1544 void MPV_frame_end(MpegEncContext *s)
1545 {
1546     int i;
1547     /* draw edge for correct motion prediction if outside */
1548 #ifdef HAVE_XVMC
1549 //just to make sure that all data is rendered.
1550     if(s->avctx->xvmc_acceleration){
1551         XVMC_field_end(s);
1552     }else
1553 #endif
1554     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1555             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1556             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1557             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1558     }
1559     emms_c();
1560
1561     s->last_pict_type    = s->pict_type;
1562     if(s->pict_type!=B_TYPE){
1563         s->last_non_b_pict_type= s->pict_type;
1564     }
1565 #if 0
1566         /* copy back current_picture variables */
1567     for(i=0; i<MAX_PICTURE_COUNT; i++){
1568         if(s->picture[i].data[0] == s->current_picture.data[0]){
1569             s->picture[i]= s->current_picture;
1570             break;
1571         }
1572     }
1573     assert(i<MAX_PICTURE_COUNT);
1574 #endif
1575
1576     if(s->encoding){
1577         /* release non refernce frames */
1578         for(i=0; i<MAX_PICTURE_COUNT; i++){
1579             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1580                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1581             }
1582         }
1583     }
1584     // clear copies, to avoid confusion
1585 #if 0
1586     memset(&s->last_picture, 0, sizeof(Picture));
1587     memset(&s->next_picture, 0, sizeof(Picture));
1588     memset(&s->current_picture, 0, sizeof(Picture));
1589 #endif
1590     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1591 }
1592
1593 /**
1594  * draws an line from (ex, ey) -> (sx, sy).
1595  * @param w width of the image
1596  * @param h height of the image
1597  * @param stride stride/linesize of the image
1598  * @param color color of the arrow
1599  */
1600 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1601     int t, x, y, fr, f;
1602
1603     sx= clip(sx, 0, w-1);
1604     sy= clip(sy, 0, h-1);
1605     ex= clip(ex, 0, w-1);
1606     ey= clip(ey, 0, h-1);
1607
1608     buf[sy*stride + sx]+= color;
1609
1610     if(ABS(ex - sx) > ABS(ey - sy)){
1611         if(sx > ex){
1612             t=sx; sx=ex; ex=t;
1613             t=sy; sy=ey; ey=t;
1614         }
1615         buf+= sx + sy*stride;
1616         ex-= sx;
1617         f= ((ey-sy)<<16)/ex;
1618         for(x= 0; x <= ex; x++){
1619             y = (x*f)>>16;
1620             fr= (x*f)&0xFFFF;
1621             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1622             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1623         }
1624     }else{
1625         if(sy > ey){
1626             t=sx; sx=ex; ex=t;
1627             t=sy; sy=ey; ey=t;
1628         }
1629         buf+= sx + sy*stride;
1630         ey-= sy;
1631         if(ey) f= ((ex-sx)<<16)/ey;
1632         else   f= 0;
1633         for(y= 0; y <= ey; y++){
1634             x = (y*f)>>16;
1635             fr= (y*f)&0xFFFF;
1636             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1637             buf[y*stride + x+1]+= (color*         fr )>>16;;
1638         }
1639     }
1640 }
1641
1642 /**
1643  * draws an arrow from (ex, ey) -> (sx, sy).
1644  * @param w width of the image
1645  * @param h height of the image
1646  * @param stride stride/linesize of the image
1647  * @param color color of the arrow
1648  */
1649 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1650     int dx,dy;
1651
1652     sx= clip(sx, -100, w+100);
1653     sy= clip(sy, -100, h+100);
1654     ex= clip(ex, -100, w+100);
1655     ey= clip(ey, -100, h+100);
1656
1657     dx= ex - sx;
1658     dy= ey - sy;
1659
1660     if(dx*dx + dy*dy > 3*3){
1661         int rx=  dx + dy;
1662         int ry= -dx + dy;
1663         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1664
1665         //FIXME subpixel accuracy
1666         rx= ROUNDED_DIV(rx*3<<4, length);
1667         ry= ROUNDED_DIV(ry*3<<4, length);
1668
1669         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1670         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1671     }
1672     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1673 }
1674
1675 /**
1676  * prints debuging info for the given picture.
1677  */
1678 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1679
1680     if(!pict || !pict->mb_type) return;
1681
1682     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1683         int x,y;
1684
1685         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1686         switch (pict->pict_type) {
1687             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1688             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1689             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1690             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1691             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1692             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1693         }
1694         for(y=0; y<s->mb_height; y++){
1695             for(x=0; x<s->mb_width; x++){
1696                 if(s->avctx->debug&FF_DEBUG_SKIP){
1697                     int count= s->mbskip_table[x + y*s->mb_stride];
1698                     if(count>9) count=9;
1699                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1700                 }
1701                 if(s->avctx->debug&FF_DEBUG_QP){
1702                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1703                 }
1704                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1705                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1706                     //Type & MV direction
1707                     if(IS_PCM(mb_type))
1708                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1709                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1710                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1711                     else if(IS_INTRA4x4(mb_type))
1712                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1713                     else if(IS_INTRA16x16(mb_type))
1714                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1715                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1716                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1717                     else if(IS_DIRECT(mb_type))
1718                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1719                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1720                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1721                     else if(IS_GMC(mb_type))
1722                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1723                     else if(IS_SKIP(mb_type))
1724                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1725                     else if(!USES_LIST(mb_type, 1))
1726                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1727                     else if(!USES_LIST(mb_type, 0))
1728                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1729                     else{
1730                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1731                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1732                     }
1733
1734                     //segmentation
1735                     if(IS_8X8(mb_type))
1736                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1737                     else if(IS_16X8(mb_type))
1738                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1739                     else if(IS_8X16(mb_type))
1740                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1741                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1742                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1743                     else
1744                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1745
1746
1747                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1748                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1749                     else
1750                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1751                 }
1752 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1753             }
1754             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1755         }
1756     }
1757
1758     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1759         const int shift= 1 + s->quarter_sample;
1760         int mb_y;
1761         uint8_t *ptr;
1762         int i;
1763         int h_chroma_shift, v_chroma_shift;
1764         const int width = s->avctx->width;
1765         const int height= s->avctx->height;
1766         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1767         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1768         s->low_delay=0; //needed to see the vectors without trashing the buffers
1769
1770         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1771         for(i=0; i<3; i++){
1772             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1773             pict->data[i]= s->visualization_buffer[i];
1774         }
1775         pict->type= FF_BUFFER_TYPE_COPY;
1776         ptr= pict->data[0];
1777
1778         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1779             int mb_x;
1780             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1781                 const int mb_index= mb_x + mb_y*s->mb_stride;
1782                 if((s->avctx->debug_mv) && pict->motion_val){
1783                   int type;
1784                   for(type=0; type<3; type++){
1785                     int direction = 0;
1786                     switch (type) {
1787                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1788                                 continue;
1789                               direction = 0;
1790                               break;
1791                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1792                                 continue;
1793                               direction = 0;
1794                               break;
1795                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1796                                 continue;
1797                               direction = 1;
1798                               break;
1799                     }
1800                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1801                         continue;
1802
1803                     if(IS_8X8(pict->mb_type[mb_index])){
1804                       int i;
1805                       for(i=0; i<4; i++){
1806                         int sx= mb_x*16 + 4 + 8*(i&1);
1807                         int sy= mb_y*16 + 4 + 8*(i>>1);
1808                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << mv_sample_log2-1;
1809                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1810                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1811                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1812                       }
1813                     }else if(IS_16X8(pict->mb_type[mb_index])){
1814                       int i;
1815                       for(i=0; i<2; i++){
1816                         int sx=mb_x*16 + 8;
1817                         int sy=mb_y*16 + 4 + 8*i;
1818                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << mv_sample_log2-1;
1819                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1820                         int my=(pict->motion_val[direction][xy][1]>>shift);
1821
1822                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1823                             my*=2;
1824
1825                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1826                       }
1827                     }else if(IS_8X16(pict->mb_type[mb_index])){
1828                       int i;
1829                       for(i=0; i<2; i++){
1830                         int sx=mb_x*16 + 4 + 8*i;
1831                         int sy=mb_y*16 + 8;
1832                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << mv_sample_log2-1;
1833                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1834                         int my=(pict->motion_val[direction][xy][1]>>shift);
1835
1836                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1837                             my*=2;
1838
1839                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1840                       }
1841                     }else{
1842                       int sx= mb_x*16 + 8;
1843                       int sy= mb_y*16 + 8;
1844                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1845                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1846                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1847                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1848                     }
1849                   }
1850                 }
1851                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1852                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1853                     int y;
1854                     for(y=0; y<8; y++){
1855                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1856                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1857                     }
1858                 }
1859                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1860                     int mb_type= pict->mb_type[mb_index];
1861                     uint64_t u,v;
1862                     int y;
1863 #define COLOR(theta, r)\
1864 u= (int)(128 + r*cos(theta*3.141592/180));\
1865 v= (int)(128 + r*sin(theta*3.141592/180));
1866
1867
1868                     u=v=128;
1869                     if(IS_PCM(mb_type)){
1870                         COLOR(120,48)
1871                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1872                         COLOR(30,48)
1873                     }else if(IS_INTRA4x4(mb_type)){
1874                         COLOR(90,48)
1875                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1876 //                        COLOR(120,48)
1877                     }else if(IS_DIRECT(mb_type)){
1878                         COLOR(150,48)
1879                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1880                         COLOR(170,48)
1881                     }else if(IS_GMC(mb_type)){
1882                         COLOR(190,48)
1883                     }else if(IS_SKIP(mb_type)){
1884 //                        COLOR(180,48)
1885                     }else if(!USES_LIST(mb_type, 1)){
1886                         COLOR(240,48)
1887                     }else if(!USES_LIST(mb_type, 0)){
1888                         COLOR(0,48)
1889                     }else{
1890                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1891                         COLOR(300,48)
1892                     }
1893
1894                     u*= 0x0101010101010101ULL;
1895                     v*= 0x0101010101010101ULL;
1896                     for(y=0; y<8; y++){
1897                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1898                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1899                     }
1900
1901                     //segmentation
1902                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1903                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1904                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1905                     }
1906                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1907                         for(y=0; y<16; y++)
1908                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1909                     }
1910
1911                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1912                         // hmm
1913                     }
1914                 }
1915                 s->mbskip_table[mb_index]=0;
1916             }
1917         }
1918     }
1919 }
1920
1921 #ifdef CONFIG_ENCODERS
1922
1923 static int get_sae(uint8_t *src, int ref, int stride){
1924     int x,y;
1925     int acc=0;
1926
1927     for(y=0; y<16; y++){
1928         for(x=0; x<16; x++){
1929             acc+= ABS(src[x+y*stride] - ref);
1930         }
1931     }
1932
1933     return acc;
1934 }
1935
1936 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1937     int x, y, w, h;
1938     int acc=0;
1939
1940     w= s->width &~15;
1941     h= s->height&~15;
1942
1943     for(y=0; y<h; y+=16){
1944         for(x=0; x<w; x+=16){
1945             int offset= x + y*stride;
1946             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1947             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1948             int sae = get_sae(src + offset, mean, stride);
1949
1950             acc+= sae + 500 < sad;
1951         }
1952     }
1953     return acc;
1954 }
1955
1956
1957 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1958     AVFrame *pic=NULL;
1959     int i;
1960     const int encoding_delay= s->max_b_frames;
1961     int direct=1;
1962
1963     if(pic_arg){
1964         if(pic_arg->pts != AV_NOPTS_VALUE){
1965             if(s->user_specified_pts != AV_NOPTS_VALUE){
1966                 int64_t time= av_rescale(pic_arg->pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
1967                 int64_t last= av_rescale(s->user_specified_pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
1968
1969                 if(time <= last){
1970                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pic_arg->pts, s->user_specified_pts);
1971                     return -1;
1972                 }
1973             }
1974             s->user_specified_pts= pic_arg->pts;
1975         }else{
1976             if(s->user_specified_pts != AV_NOPTS_VALUE){
1977                 s->user_specified_pts=
1978                 pic_arg->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
1979                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic_arg->pts);
1980             }else{
1981                 pic_arg->pts= av_rescale(pic_arg->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
1982             }
1983         }
1984     }
1985
1986   if(pic_arg){
1987     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1988     if(pic_arg->linesize[0] != s->linesize) direct=0;
1989     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1990     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1991
1992 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1993
1994     if(direct){
1995         i= ff_find_unused_picture(s, 1);
1996
1997         pic= (AVFrame*)&s->picture[i];
1998         pic->reference= 3;
1999
2000         for(i=0; i<4; i++){
2001             pic->data[i]= pic_arg->data[i];
2002             pic->linesize[i]= pic_arg->linesize[i];
2003         }
2004         alloc_picture(s, (Picture*)pic, 1);
2005     }else{
2006         int offset= 16;
2007         i= ff_find_unused_picture(s, 0);
2008
2009         pic= (AVFrame*)&s->picture[i];
2010         pic->reference= 3;
2011
2012         alloc_picture(s, (Picture*)pic, 0);
2013
2014         if(   pic->data[0] + offset == pic_arg->data[0]
2015            && pic->data[1] + offset == pic_arg->data[1]
2016            && pic->data[2] + offset == pic_arg->data[2]){
2017        // empty
2018         }else{
2019             int h_chroma_shift, v_chroma_shift;
2020             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2021
2022             for(i=0; i<3; i++){
2023                 int src_stride= pic_arg->linesize[i];
2024                 int dst_stride= i ? s->uvlinesize : s->linesize;
2025                 int h_shift= i ? h_chroma_shift : 0;
2026                 int v_shift= i ? v_chroma_shift : 0;
2027                 int w= s->width >>h_shift;
2028                 int h= s->height>>v_shift;
2029                 uint8_t *src= pic_arg->data[i];
2030                 uint8_t *dst= pic->data[i] + offset;
2031
2032                 if(src_stride==dst_stride)
2033                     memcpy(dst, src, src_stride*h);
2034                 else{
2035                     while(h--){
2036                         memcpy(dst, src, w);
2037                         dst += dst_stride;
2038                         src += src_stride;
2039                     }
2040                 }
2041             }
2042         }
2043     }
2044     copy_picture_attributes(s, pic, pic_arg);
2045
2046     pic->display_picture_number= s->input_picture_number++;
2047
2048   }
2049
2050     /* shift buffer entries */
2051     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2052         s->input_picture[i-1]= s->input_picture[i];
2053
2054     s->input_picture[encoding_delay]= (Picture*)pic;
2055
2056     return 0;
2057 }
2058
2059 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2060     int x, y, plane;
2061     int score=0;
2062     int64_t score64=0;
2063     int64_t threshold;
2064
2065     for(plane=0; plane<3; plane++){
2066         const int stride= p->linesize[plane];
2067         const int bw= plane ? 1 : 2;
2068         for(y=0; y<s->mb_height*bw; y++){
2069             for(x=0; x<s->mb_width*bw; x++){
2070                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2071
2072                 switch(s->avctx->frame_skip_exp){
2073                     case 0: score= FFMAX(score, v); break;
2074                     case 1: score+= ABS(v);break;
2075                     case 2: score+= v*v;break;
2076                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2077                     case 4: score64+= v*v*(int64_t)(v*v);break;
2078                 }
2079             }
2080         }
2081     }
2082
2083     if(score) score64= score;
2084
2085     if(score64 < s->avctx->frame_skip_threshold)
2086         return 1;
2087     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2088         return 1;
2089     return 0;
2090 }
2091
2092 static void select_input_picture(MpegEncContext *s){
2093     int i;
2094
2095     for(i=1; i<MAX_PICTURE_COUNT; i++)
2096         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2097     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2098
2099     /* set next picture types & ordering */
2100     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2101         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2102             s->reordered_input_picture[0]= s->input_picture[0];
2103             s->reordered_input_picture[0]->pict_type= I_TYPE;
2104             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2105         }else{
2106             int b_frames;
2107
2108             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2109                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2110 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2111
2112                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2113                         for(i=0; i<4; i++)
2114                             s->input_picture[0]->data[i]= NULL;
2115                         s->input_picture[0]->type= 0;
2116                     }else{
2117                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2118                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2119
2120                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2121                     }
2122
2123                     goto no_output_pic;
2124                 }
2125             }
2126
2127             if(s->flags&CODEC_FLAG_PASS2){
2128                 for(i=0; i<s->max_b_frames+1; i++){
2129                     int pict_num= s->input_picture[0]->display_picture_number + i;
2130
2131                     if(pict_num >= s->rc_context.num_entries)
2132                         break;
2133                     if(!s->input_picture[i]){
2134                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2135                         break;
2136                     }
2137
2138                     s->input_picture[i]->pict_type=
2139                         s->rc_context.entry[pict_num].new_pict_type;
2140                 }
2141             }
2142
2143             if(s->avctx->b_frame_strategy==0){
2144                 b_frames= s->max_b_frames;
2145                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2146             }else if(s->avctx->b_frame_strategy==1){
2147                 for(i=1; i<s->max_b_frames+1; i++){
2148                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2149                         s->input_picture[i]->b_frame_score=
2150                             get_intra_count(s, s->input_picture[i  ]->data[0],
2151                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2152                     }
2153                 }
2154                 for(i=0; i<s->max_b_frames; i++){
2155                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2156                 }
2157
2158                 b_frames= FFMAX(0, i-1);
2159
2160                 /* reset scores */
2161                 for(i=0; i<b_frames+1; i++){
2162                     s->input_picture[i]->b_frame_score=0;
2163                 }
2164             }else{
2165                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2166                 b_frames=0;
2167             }
2168
2169             emms_c();
2170 //static int b_count=0;
2171 //b_count+= b_frames;
2172 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2173
2174             for(i= b_frames - 1; i>=0; i--){
2175                 int type= s->input_picture[i]->pict_type;
2176                 if(type && type != B_TYPE)
2177                     b_frames= i;
2178             }
2179             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2180                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2181             }
2182
2183             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2184               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2185                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2186               }else{
2187                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2188                     b_frames=0;
2189                 s->input_picture[b_frames]->pict_type= I_TYPE;
2190               }
2191             }
2192
2193             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2194                && b_frames
2195                && s->input_picture[b_frames]->pict_type== I_TYPE)
2196                 b_frames--;
2197
2198             s->reordered_input_picture[0]= s->input_picture[b_frames];
2199             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2200                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2201             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2202             for(i=0; i<b_frames; i++){
2203                 s->reordered_input_picture[i+1]= s->input_picture[i];
2204                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2205                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2206             }
2207         }
2208     }
2209 no_output_pic:
2210     if(s->reordered_input_picture[0]){
2211         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2212
2213         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2214
2215         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2216             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2217
2218             int i= ff_find_unused_picture(s, 0);
2219             Picture *pic= &s->picture[i];
2220
2221             /* mark us unused / free shared pic */
2222             for(i=0; i<4; i++)
2223                 s->reordered_input_picture[0]->data[i]= NULL;
2224             s->reordered_input_picture[0]->type= 0;
2225
2226             pic->reference              = s->reordered_input_picture[0]->reference;
2227
2228             alloc_picture(s, pic, 0);
2229
2230             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2231
2232             s->current_picture_ptr= pic;
2233         }else{
2234             // input is not a shared pix -> reuse buffer for current_pix
2235
2236             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2237                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2238
2239             s->current_picture_ptr= s->reordered_input_picture[0];
2240             for(i=0; i<4; i++){
2241                 s->new_picture.data[i]+=16;
2242             }
2243         }
2244         copy_picture(&s->current_picture, s->current_picture_ptr);
2245
2246         s->picture_number= s->new_picture.display_picture_number;
2247 //printf("dpn:%d\n", s->picture_number);
2248     }else{
2249        memset(&s->new_picture, 0, sizeof(Picture));
2250     }
2251 }
2252
2253 int MPV_encode_picture(AVCodecContext *avctx,
2254                        unsigned char *buf, int buf_size, void *data)
2255 {
2256     MpegEncContext *s = avctx->priv_data;
2257     AVFrame *pic_arg = data;
2258     int i, stuffing_count;
2259
2260     if(avctx->pix_fmt != PIX_FMT_YUV420P){
2261         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2262         return -1;
2263     }
2264
2265     for(i=0; i<avctx->thread_count; i++){
2266         int start_y= s->thread_context[i]->start_mb_y;
2267         int   end_y= s->thread_context[i]->  end_mb_y;
2268         int h= s->mb_height;
2269         uint8_t *start= buf + buf_size*start_y/h;
2270         uint8_t *end  = buf + buf_size*  end_y/h;
2271
2272         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2273     }
2274
2275     s->picture_in_gop_number++;
2276
2277     if(load_input_picture(s, pic_arg) < 0)
2278         return -1;
2279
2280     select_input_picture(s);
2281
2282     /* output? */
2283     if(s->new_picture.data[0]){
2284         s->pict_type= s->new_picture.pict_type;
2285 //emms_c();
2286 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2287         MPV_frame_start(s, avctx);
2288
2289         encode_picture(s, s->picture_number);
2290
2291         avctx->real_pict_num  = s->picture_number;
2292         avctx->header_bits = s->header_bits;
2293         avctx->mv_bits     = s->mv_bits;
2294         avctx->misc_bits   = s->misc_bits;
2295         avctx->i_tex_bits  = s->i_tex_bits;
2296         avctx->p_tex_bits  = s->p_tex_bits;
2297         avctx->i_count     = s->i_count;
2298         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2299         avctx->skip_count  = s->skip_count;
2300
2301         MPV_frame_end(s);
2302
2303         if (s->out_format == FMT_MJPEG)
2304             mjpeg_picture_trailer(s);
2305
2306         if(s->flags&CODEC_FLAG_PASS1)
2307             ff_write_pass1_stats(s);
2308
2309         for(i=0; i<4; i++){
2310             avctx->error[i] += s->current_picture_ptr->error[i];
2311         }
2312
2313         flush_put_bits(&s->pb);
2314         s->frame_bits  = put_bits_count(&s->pb);
2315
2316         stuffing_count= ff_vbv_update(s, s->frame_bits);
2317         if(stuffing_count){
2318             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2319                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2320                 return -1;
2321             }
2322
2323             switch(s->codec_id){
2324             case CODEC_ID_MPEG1VIDEO:
2325             case CODEC_ID_MPEG2VIDEO:
2326                 while(stuffing_count--){
2327                     put_bits(&s->pb, 8, 0);
2328                 }
2329             break;
2330             case CODEC_ID_MPEG4:
2331                 put_bits(&s->pb, 16, 0);
2332                 put_bits(&s->pb, 16, 0x1C3);
2333                 stuffing_count -= 4;
2334                 while(stuffing_count--){
2335                     put_bits(&s->pb, 8, 0xFF);
2336                 }
2337             break;
2338             default:
2339                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2340             }
2341             flush_put_bits(&s->pb);
2342             s->frame_bits  = put_bits_count(&s->pb);
2343         }
2344
2345         /* update mpeg1/2 vbv_delay for CBR */
2346         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2347            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2348             int vbv_delay;
2349
2350             assert(s->repeat_first_field==0);
2351
2352             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2353             assert(vbv_delay < 0xFFFF);
2354
2355             s->vbv_delay_ptr[0] &= 0xF8;
2356             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2357             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2358             s->vbv_delay_ptr[2] &= 0x07;
2359             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2360         }
2361         s->total_bits += s->frame_bits;
2362         avctx->frame_bits  = s->frame_bits;
2363     }else{
2364         assert((pbBufPtr(&s->pb) == s->pb.buf));
2365         s->frame_bits=0;
2366     }
2367     assert((s->frame_bits&7)==0);
2368
2369     return s->frame_bits/8;
2370 }
2371
2372 #endif //CONFIG_ENCODERS
2373
2374 static inline void gmc1_motion(MpegEncContext *s,
2375                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2376                                uint8_t **ref_picture)
2377 {
2378     uint8_t *ptr;
2379     int offset, src_x, src_y, linesize, uvlinesize;
2380     int motion_x, motion_y;
2381     int emu=0;
2382
2383     motion_x= s->sprite_offset[0][0];
2384     motion_y= s->sprite_offset[0][1];
2385     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2386     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2387     motion_x<<=(3-s->sprite_warping_accuracy);
2388     motion_y<<=(3-s->sprite_warping_accuracy);
2389     src_x = clip(src_x, -16, s->width);
2390     if (src_x == s->width)
2391         motion_x =0;
2392     src_y = clip(src_y, -16, s->height);
2393     if (src_y == s->height)
2394         motion_y =0;
2395
2396     linesize = s->linesize;
2397     uvlinesize = s->uvlinesize;
2398
2399     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2400
2401     if(s->flags&CODEC_FLAG_EMU_EDGE){
2402         if(   (unsigned)src_x >= s->h_edge_pos - 17
2403            || (unsigned)src_y >= s->v_edge_pos - 17){
2404             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2405             ptr= s->edge_emu_buffer;
2406         }
2407     }
2408
2409     if((motion_x|motion_y)&7){
2410         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2411         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2412     }else{
2413         int dxy;
2414
2415         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2416         if (s->no_rounding){
2417             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2418         }else{
2419             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2420         }
2421     }
2422
2423     if(s->flags&CODEC_FLAG_GRAY) return;
2424
2425     motion_x= s->sprite_offset[1][0];
2426     motion_y= s->sprite_offset[1][1];
2427     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2428     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2429     motion_x<<=(3-s->sprite_warping_accuracy);
2430     motion_y<<=(3-s->sprite_warping_accuracy);
2431     src_x = clip(src_x, -8, s->width>>1);
2432     if (src_x == s->width>>1)
2433         motion_x =0;
2434     src_y = clip(src_y, -8, s->height>>1);
2435     if (src_y == s->height>>1)
2436         motion_y =0;
2437
2438     offset = (src_y * uvlinesize) + src_x;
2439     ptr = ref_picture[1] + offset;
2440     if(s->flags&CODEC_FLAG_EMU_EDGE){
2441         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2442            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2443             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2444             ptr= s->edge_emu_buffer;
2445             emu=1;
2446         }
2447     }
2448     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2449
2450     ptr = ref_picture[2] + offset;
2451     if(emu){
2452         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2453         ptr= s->edge_emu_buffer;
2454     }
2455     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2456
2457     return;
2458 }
2459
2460 static inline void gmc_motion(MpegEncContext *s,
2461                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2462                                uint8_t **ref_picture)
2463 {
2464     uint8_t *ptr;
2465     int linesize, uvlinesize;
2466     const int a= s->sprite_warping_accuracy;
2467     int ox, oy;
2468
2469     linesize = s->linesize;
2470     uvlinesize = s->uvlinesize;
2471
2472     ptr = ref_picture[0];
2473
2474     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2475     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2476
2477     s->dsp.gmc(dest_y, ptr, linesize, 16,
2478            ox,
2479            oy,
2480            s->sprite_delta[0][0], s->sprite_delta[0][1],
2481            s->sprite_delta[1][0], s->sprite_delta[1][1],
2482            a+1, (1<<(2*a+1)) - s->no_rounding,
2483            s->h_edge_pos, s->v_edge_pos);
2484     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2485            ox + s->sprite_delta[0][0]*8,
2486            oy + s->sprite_delta[1][0]*8,
2487            s->sprite_delta[0][0], s->sprite_delta[0][1],
2488            s->sprite_delta[1][0], s->sprite_delta[1][1],
2489            a+1, (1<<(2*a+1)) - s->no_rounding,
2490            s->h_edge_pos, s->v_edge_pos);
2491
2492     if(s->flags&CODEC_FLAG_GRAY) return;
2493
2494     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2495     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2496
2497     ptr = ref_picture[1];
2498     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2499            ox,
2500            oy,
2501            s->sprite_delta[0][0], s->sprite_delta[0][1],
2502            s->sprite_delta[1][0], s->sprite_delta[1][1],
2503            a+1, (1<<(2*a+1)) - s->no_rounding,
2504            s->h_edge_pos>>1, s->v_edge_pos>>1);
2505
2506     ptr = ref_picture[2];
2507     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2508            ox,
2509            oy,
2510            s->sprite_delta[0][0], s->sprite_delta[0][1],
2511            s->sprite_delta[1][0], s->sprite_delta[1][1],
2512            a+1, (1<<(2*a+1)) - s->no_rounding,
2513            s->h_edge_pos>>1, s->v_edge_pos>>1);
2514 }
2515
2516 /**
2517  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2518  * @param buf destination buffer
2519  * @param src source buffer
2520  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2521  * @param block_w width of block
2522  * @param block_h height of block
2523  * @param src_x x coordinate of the top left sample of the block in the source buffer
2524  * @param src_y y coordinate of the top left sample of the block in the source buffer
2525  * @param w width of the source buffer
2526  * @param h height of the source buffer
2527  */
2528 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2529                                     int src_x, int src_y, int w, int h){
2530     int x, y;
2531     int start_y, start_x, end_y, end_x;
2532
2533     if(src_y>= h){
2534         src+= (h-1-src_y)*linesize;
2535         src_y=h-1;
2536     }else if(src_y<=-block_h){
2537         src+= (1-block_h-src_y)*linesize;
2538         src_y=1-block_h;
2539     }
2540     if(src_x>= w){
2541         src+= (w-1-src_x);
2542         src_x=w-1;
2543     }else if(src_x<=-block_w){
2544         src+= (1-block_w-src_x);
2545         src_x=1-block_w;
2546     }
2547
2548     start_y= FFMAX(0, -src_y);
2549     start_x= FFMAX(0, -src_x);
2550     end_y= FFMIN(block_h, h-src_y);
2551     end_x= FFMIN(block_w, w-src_x);
2552
2553     // copy existing part
2554     for(y=start_y; y<end_y; y++){
2555         for(x=start_x; x<end_x; x++){
2556             buf[x + y*linesize]= src[x + y*linesize];
2557         }
2558     }
2559
2560     //top
2561     for(y=0; y<start_y; y++){
2562         for(x=start_x; x<end_x; x++){
2563             buf[x + y*linesize]= buf[x + start_y*linesize];
2564         }
2565     }
2566
2567     //bottom
2568     for(y=end_y; y<block_h; y++){
2569         for(x=start_x; x<end_x; x++){
2570             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2571         }
2572     }
2573
2574     for(y=0; y<block_h; y++){
2575        //left
2576         for(x=0; x<start_x; x++){
2577             buf[x + y*linesize]= buf[start_x + y*linesize];
2578         }
2579
2580        //right
2581         for(x=end_x; x<block_w; x++){
2582             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2583         }
2584     }
2585 }
2586
2587 static inline int hpel_motion(MpegEncContext *s,
2588                                   uint8_t *dest, uint8_t *src,
2589                                   int field_based, int field_select,
2590                                   int src_x, int src_y,
2591                                   int width, int height, int stride,
2592                                   int h_edge_pos, int v_edge_pos,
2593                                   int w, int h, op_pixels_func *pix_op,
2594                                   int motion_x, int motion_y)
2595 {
2596     int dxy;
2597     int emu=0;
2598
2599     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2600     src_x += motion_x >> 1;
2601     src_y += motion_y >> 1;
2602
2603     /* WARNING: do no forget half pels */
2604     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2605     if (src_x == width)
2606         dxy &= ~1;
2607     src_y = clip(src_y, -16, height);
2608     if (src_y == height)
2609         dxy &= ~2;
2610     src += src_y * stride + src_x;
2611
2612     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2613         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2614            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2615             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2616                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2617             src= s->edge_emu_buffer;
2618             emu=1;
2619         }
2620     }
2621     if(field_select)
2622         src += s->linesize;
2623     pix_op[dxy](dest, src, stride, h);
2624     return emu;
2625 }
2626
2627 static inline int hpel_motion_lowres(MpegEncContext *s,
2628                                   uint8_t *dest, uint8_t *src,
2629                                   int field_based, int field_select,
2630                                   int src_x, int src_y,
2631                                   int width, int height, int stride,
2632                                   int h_edge_pos, int v_edge_pos,
2633                                   int w, int h, h264_chroma_mc_func *pix_op,
2634                                   int motion_x, int motion_y)
2635 {
2636     const int lowres= s->avctx->lowres;
2637     const int s_mask= (2<<lowres)-1;
2638     int emu=0;
2639     int sx, sy;
2640
2641     if(s->quarter_sample){
2642         motion_x/=2;
2643         motion_y/=2;
2644     }
2645
2646     sx= motion_x & s_mask;
2647     sy= motion_y & s_mask;
2648     src_x += motion_x >> (lowres+1);
2649     src_y += motion_y >> (lowres+1);
2650
2651     src += src_y * stride + src_x;
2652
2653     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2654        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2655         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2656                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2657         src= s->edge_emu_buffer;
2658         emu=1;
2659     }
2660
2661     sx <<= 2 - lowres;
2662     sy <<= 2 - lowres;
2663     if(field_select)
2664         src += s->linesize;
2665     pix_op[lowres](dest, src, stride, h, sx, sy);
2666     return emu;
2667 }
2668
2669 /* apply one mpeg motion vector to the three components */
2670 static always_inline void mpeg_motion(MpegEncContext *s,
2671                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2672                                int field_based, int bottom_field, int field_select,
2673                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2674                                int motion_x, int motion_y, int h)
2675 {
2676     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2677     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2678
2679 #if 0
2680 if(s->quarter_sample)
2681 {
2682     motion_x>>=1;
2683     motion_y>>=1;
2684 }
2685 #endif
2686
2687     v_edge_pos = s->v_edge_pos >> field_based;
2688     linesize   = s->current_picture.linesize[0] << field_based;
2689     uvlinesize = s->current_picture.linesize[1] << field_based;
2690
2691     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2692     src_x = s->mb_x* 16               + (motion_x >> 1);
2693     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2694
2695     if (s->out_format == FMT_H263) {
2696         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2697             mx = (motion_x>>1)|(motion_x&1);
2698             my = motion_y >>1;
2699             uvdxy = ((my & 1) << 1) | (mx & 1);
2700             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2701             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2702         }else{
2703             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2704             uvsrc_x = src_x>>1;
2705             uvsrc_y = src_y>>1;
2706         }
2707     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2708         mx = motion_x / 4;
2709         my = motion_y / 4;
2710         uvdxy = 0;
2711         uvsrc_x = s->mb_x*8 + mx;
2712         uvsrc_y = s->mb_y*8 + my;
2713     } else {
2714         if(s->chroma_y_shift){
2715             mx = motion_x / 2;
2716             my = motion_y / 2;
2717             uvdxy = ((my & 1) << 1) | (mx & 1);
2718             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2719             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2720         } else {
2721             if(s->chroma_x_shift){
2722             //Chroma422
2723                 mx = motion_x / 2;
2724                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2725                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2726                 uvsrc_y = src_y;
2727             } else {
2728             //Chroma444
2729                 uvdxy = dxy;
2730                 uvsrc_x = src_x;
2731                 uvsrc_y = src_y;
2732             }
2733         }
2734     }
2735
2736     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2737     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2738     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2739
2740     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2741        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2742             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2743                s->codec_id == CODEC_ID_MPEG1VIDEO){
2744                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2745                 return ;
2746             }
2747             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2748                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2749             ptr_y = s->edge_emu_buffer;
2750             if(!(s->flags&CODEC_FLAG_GRAY)){
2751                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2752                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2753                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2754                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2755                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2756                 ptr_cb= uvbuf;
2757                 ptr_cr= uvbuf+16;
2758             }
2759     }
2760
2761     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2762         dest_y += s->linesize;
2763         dest_cb+= s->uvlinesize;
2764         dest_cr+= s->uvlinesize;
2765     }
2766
2767     if(field_select){
2768         ptr_y += s->linesize;
2769         ptr_cb+= s->uvlinesize;
2770         ptr_cr+= s->uvlinesize;
2771     }
2772
2773     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2774
2775     if(!(s->flags&CODEC_FLAG_GRAY)){
2776         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2777         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2778     }
2779     if(s->out_format == FMT_H261){
2780         ff_h261_loop_filter(s);
2781     }
2782 }
2783
2784 /* apply one mpeg motion vector to the three components */
2785 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2786                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2787                                int field_based, int bottom_field, int field_select,
2788                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2789                                int motion_x, int motion_y, int h)
2790 {
2791     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2792     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2793     const int lowres= s->avctx->lowres;
2794     const int block_s= 8>>lowres;
2795     const int s_mask= (2<<lowres)-1;
2796     const int h_edge_pos = s->h_edge_pos >> lowres;
2797     const int v_edge_pos = s->v_edge_pos >> lowres;
2798     linesize   = s->current_picture.linesize[0] << field_based;
2799     uvlinesize = s->current_picture.linesize[1] << field_based;
2800
2801     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2802         motion_x/=2;
2803         motion_y/=2;
2804     }
2805
2806     if(field_based){
2807         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2808     }
2809
2810     sx= motion_x & s_mask;
2811     sy= motion_y & s_mask;
2812     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2813     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2814
2815     if (s->out_format == FMT_H263) {
2816         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2817         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2818         uvsrc_x = src_x>>1;
2819         uvsrc_y = src_y>>1;
2820     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2821         mx = motion_x / 4;
2822         my = motion_y / 4;
2823         uvsx = (2*mx) & s_mask;
2824         uvsy = (2*my) & s_mask;
2825         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2826         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2827     } else {
2828         mx = motion_x / 2;
2829         my = motion_y / 2;
2830         uvsx = mx & s_mask;
2831         uvsy = my & s_mask;
2832         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2833         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2834     }
2835
2836     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2837     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2838     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2839
2840     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2841        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2842             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2843                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2844             ptr_y = s->edge_emu_buffer;
2845             if(!(s->flags&CODEC_FLAG_GRAY)){
2846                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2847                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2848                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2849                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2850                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2851                 ptr_cb= uvbuf;
2852                 ptr_cr= uvbuf+16;
2853             }
2854     }
2855
2856     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2857         dest_y += s->linesize;
2858         dest_cb+= s->uvlinesize;
2859         dest_cr+= s->uvlinesize;
2860     }
2861
2862     if(field_select){
2863         ptr_y += s->linesize;
2864         ptr_cb+= s->uvlinesize;
2865         ptr_cr+= s->uvlinesize;
2866     }
2867
2868     sx <<= 2 - lowres;
2869     sy <<= 2 - lowres;
2870     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2871
2872     if(!(s->flags&CODEC_FLAG_GRAY)){
2873         uvsx <<= 2 - lowres;
2874         uvsy <<= 2 - lowres;
2875         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2876         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2877     }
2878     //FIXME h261 lowres loop filter
2879 }
2880
2881 //FIXME move to dsputil, avg variant, 16x16 version
2882 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2883     int x;
2884     uint8_t * const top   = src[1];
2885     uint8_t * const left  = src[2];
2886     uint8_t * const mid   = src[0];
2887     uint8_t * const right = src[3];
2888     uint8_t * const bottom= src[4];
2889 #define OBMC_FILTER(x, t, l, m, r, b)\
2890     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2891 #define OBMC_FILTER4(x, t, l, m, r, b)\
2892     OBMC_FILTER(x         , t, l, m, r, b);\
2893     OBMC_FILTER(x+1       , t, l, m, r, b);\
2894     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2895     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2896
2897     x=0;
2898     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2899     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2900     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2901     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2902     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2903     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2904     x+= stride;
2905     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2906     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2907     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2908     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2909     x+= stride;
2910     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2911     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2912     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2913     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2914     x+= 2*stride;
2915     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2916     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2917     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2918     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2919     x+= 2*stride;
2920     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2921     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2922     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2923     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2924     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2925     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2926     x+= stride;
2927     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2928     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2929     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2930     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2931 }
2932
2933 /* obmc for 1 8x8 luma block */
2934 static inline void obmc_motion(MpegEncContext *s,
2935                                uint8_t *dest, uint8_t *src,
2936                                int src_x, int src_y,
2937                                op_pixels_func *pix_op,
2938                                int16_t mv[5][2]/* mid top left right bottom*/)
2939 #define MID    0
2940 {
2941     int i;
2942     uint8_t *ptr[5];
2943
2944     assert(s->quarter_sample==0);
2945
2946     for(i=0; i<5; i++){
2947         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2948             ptr[i]= ptr[MID];
2949         }else{
2950             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2951             hpel_motion(s, ptr[i], src, 0, 0,
2952                         src_x, src_y,
2953                         s->width, s->height, s->linesize,
2954                         s->h_edge_pos, s->v_edge_pos,
2955                         8, 8, pix_op,
2956                         mv[i][0], mv[i][1]);
2957         }
2958     }
2959
2960     put_obmc(dest, ptr, s->linesize);
2961 }
2962
2963 static inline void qpel_motion(MpegEncContext *s,
2964                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2965                                int field_based, int bottom_field, int field_select,
2966                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2967                                qpel_mc_func (*qpix_op)[16],
2968                                int motion_x, int motion_y, int h)
2969 {
2970     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2971     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
2972
2973     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2974     src_x = s->mb_x *  16                 + (motion_x >> 2);
2975     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2976
2977     v_edge_pos = s->v_edge_pos >> field_based;
2978     linesize = s->linesize << field_based;
2979     uvlinesize = s->uvlinesize << field_based;
2980
2981     if(field_based){
2982         mx= motion_x/2;
2983         my= motion_y>>1;
2984     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2985         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2986         mx= (motion_x>>1) + rtab[motion_x&7];
2987         my= (motion_y>>1) + rtab[motion_y&7];
2988     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2989         mx= (motion_x>>1)|(motion_x&1);
2990         my= (motion_y>>1)|(motion_y&1);
2991     }else{
2992         mx= motion_x/2;
2993         my= motion_y/2;
2994     }
2995     mx= (mx>>1)|(mx&1);
2996     my= (my>>1)|(my&1);
2997
2998     uvdxy= (mx&1) | ((my&1)<<1);
2999     mx>>=1;
3000     my>>=1;
3001
3002     uvsrc_x = s->mb_x *  8                 + mx;
3003     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3004
3005     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3006     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3007     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3008
3009     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3010        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3011         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3012                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3013         ptr_y= s->edge_emu_buffer;
3014         if(!(s->flags&CODEC_FLAG_GRAY)){
3015             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3016             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3017                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3018             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3019                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3020             ptr_cb= uvbuf;
3021             ptr_cr= uvbuf + 16;
3022         }
3023     }
3024
3025     if(!field_based)
3026         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3027     else{
3028         if(bottom_field){
3029             dest_y += s->linesize;
3030             dest_cb+= s->uvlinesize;
3031             dest_cr+= s->uvlinesize;
3032         }
3033
3034         if(field_select){
3035             ptr_y  += s->linesize;
3036             ptr_cb += s->uvlinesize;
3037             ptr_cr += s->uvlinesize;
3038         }
3039         //damn interlaced mode
3040         //FIXME boundary mirroring is not exactly correct here
3041         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3042         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3043     }
3044     if(!(s->flags&CODEC_FLAG_GRAY)){
3045         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3046         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3047     }
3048 }
3049
3050 inline int ff_h263_round_chroma(int x){
3051     if (x >= 0)
3052         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3053     else {
3054         x = -x;
3055         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3056     }
3057 }
3058
3059 /**
3060  * h263 chorma 4mv motion compensation.
3061  */
3062 static inline void chroma_4mv_motion(MpegEncContext *s,
3063                                      uint8_t *dest_cb, uint8_t *dest_cr,
3064                                      uint8_t **ref_picture,
3065                                      op_pixels_func *pix_op,
3066                                      int mx, int my){
3067     int dxy, emu=0, src_x, src_y, offset;
3068     uint8_t *ptr;
3069
3070     /* In case of 8X8, we construct a single chroma motion vector
3071        with a special rounding */
3072     mx= ff_h263_round_chroma(mx);
3073     my= ff_h263_round_chroma(my);
3074
3075     dxy = ((my & 1) << 1) | (mx & 1);
3076     mx >>= 1;
3077     my >>= 1;
3078
3079     src_x = s->mb_x * 8 + mx;
3080     src_y = s->mb_y * 8 + my;
3081     src_x = clip(src_x, -8, s->width/2);
3082     if (src_x == s->width/2)
3083         dxy &= ~1;
3084     src_y = clip(src_y, -8, s->height/2);
3085     if (src_y == s->height/2)
3086         dxy &= ~2;
3087
3088     offset = (src_y * (s->uvlinesize)) + src_x;
3089     ptr = ref_picture[1] + offset;
3090     if(s->flags&CODEC_FLAG_EMU_EDGE){
3091         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3092            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3093             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3094             ptr= s->edge_emu_buffer;
3095             emu=1;
3096         }
3097     }
3098     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3099
3100     ptr = ref_picture[2] + offset;
3101     if(emu){
3102         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3103         ptr= s->edge_emu_buffer;
3104     }
3105     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3106 }
3107
3108 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3109                                      uint8_t *dest_cb, uint8_t *dest_cr,
3110                                      uint8_t **ref_picture,
3111                                      h264_chroma_mc_func *pix_op,
3112                                      int mx, int my){
3113     const int lowres= s->avctx->lowres;
3114     const int block_s= 8>>lowres;
3115     const int s_mask= (2<<lowres)-1;
3116     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3117     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3118     int emu=0, src_x, src_y, offset, sx, sy;
3119     uint8_t *ptr;
3120
3121     if(s->quarter_sample){
3122         mx/=2;
3123         my/=2;
3124     }
3125
3126     /* In case of 8X8, we construct a single chroma motion vector
3127        with a special rounding */
3128     mx= ff_h263_round_chroma(mx);
3129     my= ff_h263_round_chroma(my);
3130
3131     sx= mx & s_mask;
3132     sy= my & s_mask;
3133     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3134     src_y = s->mb_y*block_s + (my >> (lowres+1));
3135
3136     offset = src_y * s->uvlinesize + src_x;
3137     ptr = ref_picture[1] + offset;
3138     if(s->flags&CODEC_FLAG_EMU_EDGE){
3139         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3140            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3141             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3142             ptr= s->edge_emu_buffer;
3143             emu=1;
3144         }
3145     }
3146     sx <<= 2 - lowres;
3147     sy <<= 2 - lowres;
3148     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3149
3150     ptr = ref_picture[2] + offset;
3151     if(emu){
3152         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3153         ptr= s->edge_emu_buffer;
3154     }
3155     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3156 }
3157
3158 /**
3159  * motion compesation of a single macroblock
3160  * @param s context
3161  * @param dest_y luma destination pointer
3162  * @param dest_cb chroma cb/u destination pointer
3163  * @param dest_cr chroma cr/v destination pointer
3164  * @param dir direction (0->forward, 1->backward)
3165  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3166  * @param pic_op halfpel motion compensation function (average or put normally)
3167  * @param pic_op qpel motion compensation function (average or put normally)
3168  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3169  */
3170 static inline void MPV_motion(MpegEncContext *s,
3171                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3172                               int dir, uint8_t **ref_picture,
3173                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3174 {
3175     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3176     int mb_x, mb_y, i;
3177     uint8_t *ptr, *dest;
3178
3179     mb_x = s->mb_x;
3180     mb_y = s->mb_y;
3181
3182     if(s->obmc && s->pict_type != B_TYPE){
3183         int16_t mv_cache[4][4][2];
3184         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3185         const int mot_stride= s->b8_stride;
3186         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3187
3188         assert(!s->mb_skiped);
3189
3190         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3191         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3192         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3193
3194         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3195             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3196         }else{
3197             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3198         }
3199
3200         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3201             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3202             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3203         }else{
3204             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3205             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3206         }
3207
3208         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3209             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3210             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3211         }else{
3212             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3213             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3214         }
3215
3216         mx = 0;
3217         my = 0;
3218         for(i=0;i<4;i++) {
3219             const int x= (i&1)+1;
3220             const int y= (i>>1)+1;
3221             int16_t mv[5][2]= {
3222                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3223                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3224                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3225                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3226                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3227             //FIXME cleanup
3228             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3229                         ref_picture[0],
3230                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3231                         pix_op[1],
3232                         mv);
3233
3234             mx += mv[0][0];
3235             my += mv[0][1];
3236         }
3237         if(!(s->flags&CODEC_FLAG_GRAY))
3238             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3239
3240         return;
3241     }
3242
3243     switch(s->mv_type) {
3244     case MV_TYPE_16X16:
3245 #ifdef CONFIG_RISKY
3246         if(s->mcsel){
3247             if(s->real_sprite_warping_points==1){
3248                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3249                             ref_picture);
3250             }else{
3251                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3252                             ref_picture);
3253             }
3254         }else if(s->quarter_sample){
3255             qpel_motion(s, dest_y, dest_cb, dest_cr,
3256                         0, 0, 0,
3257                         ref_picture, pix_op, qpix_op,
3258                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3259         }else if(s->mspel){
3260             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3261                         ref_picture, pix_op,
3262                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3263         }else
3264 #endif
3265         {
3266             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3267                         0, 0, 0,
3268                         ref_picture, pix_op,
3269                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3270         }
3271         break;
3272     case MV_TYPE_8X8:
3273         mx = 0;
3274         my = 0;
3275         if(s->quarter_sample){
3276             for(i=0;i<4;i++) {
3277                 motion_x = s->mv[dir][i][0];
3278                 motion_y = s->mv[dir][i][1];
3279
3280                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3281                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3282                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3283
3284                 /* WARNING: do no forget half pels */
3285                 src_x = clip(src_x, -16, s->width);
3286                 if (src_x == s->width)
3287                     dxy &= ~3;
3288                 src_y = clip(src_y, -16, s->height);
3289                 if (src_y == s->height)
3290                     dxy &= ~12;
3291
3292                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3293                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3294                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3295                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3296                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3297                         ptr= s->edge_emu_buffer;
3298                     }
3299                 }
3300                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3301                 qpix_op[1][dxy](dest, ptr, s->linesize);
3302
3303                 mx += s->mv[dir][i][0]/2;
3304                 my += s->mv[dir][i][1]/2;
3305             }
3306         }else{
3307             for(i=0;i<4;i++) {
3308                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3309                             ref_picture[0], 0, 0,
3310                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3311                             s->width, s->height, s->linesize,
3312                             s->h_edge_pos, s->v_edge_pos,
3313                             8, 8, pix_op[1],
3314                             s->mv[dir][i][0], s->mv[dir][i][1]);
3315
3316                 mx += s->mv[dir][i][0];
3317                 my += s->mv[dir][i][1];
3318             }
3319         }
3320
3321         if(!(s->flags&CODEC_FLAG_GRAY))
3322             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3323         break;
3324     case MV_TYPE_FIELD:
3325         if (s->picture_structure == PICT_FRAME) {
3326             if(s->quarter_sample){
3327                 for(i=0; i<2; i++){
3328                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3329                                 1, i, s->field_select[dir][i],
3330                                 ref_picture, pix_op, qpix_op,
3331                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3332                 }
3333             }else{
3334                 /* top field */
3335                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3336                             1, 0, s->field_select[dir][0],
3337                             ref_picture, pix_op,
3338                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3339                 /* bottom field */
3340                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3341                             1, 1, s->field_select[dir][1],
3342                             ref_picture, pix_op,
3343                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3344             }
3345         } else {
3346             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3347                 ref_picture= s->current_picture_ptr->data;
3348             }
3349
3350             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3351                         0, 0, s->field_select[dir][0],
3352                         ref_picture, pix_op,
3353                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3354         }
3355         break;
3356     case MV_TYPE_16X8:
3357         for(i=0; i<2; i++){
3358             uint8_t ** ref2picture;
3359
3360             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3361                 ref2picture= ref_picture;
3362             }else{
3363                 ref2picture= s->current_picture_ptr->data;
3364             }
3365
3366             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3367                         0, 0, s->field_select[dir][i],
3368                         ref2picture, pix_op,
3369                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3370
3371             dest_y += 16*s->linesize;
3372             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3373             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3374         }
3375         break;
3376     case MV_TYPE_DMV:
3377         if(s->picture_structure == PICT_FRAME){
3378             for(i=0; i<2; i++){
3379                 int j;
3380                 for(j=0; j<2; j++){
3381                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3382                                 1, j, j^i,
3383                                 ref_picture, pix_op,
3384                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3385                 }
3386                 pix_op = s->dsp.avg_pixels_tab;
3387             }
3388         }else{
3389             for(i=0; i<2; i++){
3390                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3391                             0, 0, s->picture_structure != i+1,
3392                             ref_picture, pix_op,
3393                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3394
3395                 // after put we make avg of the same block
3396                 pix_op=s->dsp.avg_pixels_tab;
3397
3398                 //opposite parity is always in the same frame if this is second field
3399                 if(!s->first_field){
3400                     ref_picture = s->current_picture_ptr->data;
3401                 }
3402             }
3403         }
3404     break;
3405     default: assert(0);
3406     }
3407 }
3408
3409 /**
3410  * motion compesation of a single macroblock
3411  * @param s context
3412  * @param dest_y luma destination pointer
3413  * @param dest_cb chroma cb/u destination pointer
3414  * @param dest_cr chroma cr/v destination pointer
3415  * @param dir direction (0->forward, 1->backward)
3416  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3417  * @param pic_op halfpel motion compensation function (average or put normally)
3418  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3419  */
3420 static inline void MPV_motion_lowres(MpegEncContext *s,
3421                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3422                               int dir, uint8_t **ref_picture,
3423                               h264_chroma_mc_func *pix_op)
3424 {
3425     int mx, my;
3426     int mb_x, mb_y, i;
3427     const int lowres= s->avctx->lowres;
3428     const int block_s= 8>>lowres;
3429
3430     mb_x = s->mb_x;
3431     mb_y = s->mb_y;
3432
3433     switch(s->mv_type) {
3434     case MV_TYPE_16X16:
3435         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3436                     0, 0, 0,
3437                     ref_picture, pix_op,
3438                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3439         break;
3440     case MV_TYPE_8X8:
3441         mx = 0;
3442         my = 0;
3443             for(i=0;i<4;i++) {
3444                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3445                             ref_picture[0], 0, 0,
3446                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3447                             s->width, s->height, s->linesize,
3448                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3449                             block_s, block_s, pix_op,
3450                             s->mv[dir][i][0], s->mv[dir][i][1]);
3451
3452                 mx += s->mv[dir][i][0];
3453                 my += s->mv[dir][i][1];
3454             }
3455
3456         if(!(s->flags&CODEC_FLAG_GRAY))
3457             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3458         break;
3459     case MV_TYPE_FIELD:
3460         if (s->picture_structure == PICT_FRAME) {
3461             /* top field */
3462             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3463                         1, 0, s->field_select[dir][0],
3464                         ref_picture, pix_op,
3465                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3466             /* bottom field */
3467             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3468                         1, 1, s->field_select[dir][1],
3469                         ref_picture, pix_op,
3470                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3471         } else {
3472             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3473                 ref_picture= s->current_picture_ptr->data;
3474             }
3475
3476             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3477                         0, 0, s->field_select[dir][0],
3478                         ref_picture, pix_op,
3479                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3480         }
3481         break;
3482     case MV_TYPE_16X8:
3483         for(i=0; i<2; i++){
3484             uint8_t ** ref2picture;
3485
3486             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3487                 ref2picture= ref_picture;
3488             }else{
3489                 ref2picture= s->current_picture_ptr->data;
3490             }
3491
3492             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3493                         0, 0, s->field_select[dir][i],
3494                         ref2picture, pix_op,
3495                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3496
3497             dest_y += 2*block_s*s->linesize;
3498             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3499             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3500         }
3501         break;
3502     case MV_TYPE_DMV:
3503         if(s->picture_structure == PICT_FRAME){
3504             for(i=0; i<2; i++){
3505                 int j;
3506                 for(j=0; j<2; j++){
3507                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3508                                 1, j, j^i,
3509                                 ref_picture, pix_op,
3510                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3511                 }
3512                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3513             }
3514         }else{
3515             for(i=0; i<2; i++){
3516                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3517                             0, 0, s->picture_structure != i+1,
3518                             ref_picture, pix_op,
3519                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3520
3521                 // after put we make avg of the same block
3522                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3523
3524                 //opposite parity is always in the same frame if this is second field
3525                 if(!s->first_field){
3526                     ref_picture = s->current_picture_ptr->data;
3527                 }
3528             }
3529         }
3530     break;
3531     default: assert(0);
3532     }
3533 }
3534
3535 /* put block[] to dest[] */
3536 static inline void put_dct(MpegEncContext *s,
3537                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3538 {
3539     s->dct_unquantize_intra(s, block, i, qscale);
3540     s->dsp.idct_put (dest, line_size, block);
3541 }
3542
3543 /* add block[] to dest[] */
3544 static inline void add_dct(MpegEncContext *s,
3545                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3546 {
3547     if (s->block_last_index[i] >= 0) {
3548         s->dsp.idct_add (dest, line_size, block);
3549     }
3550 }
3551
3552 static inline void add_dequant_dct(MpegEncContext *s,
3553                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3554 {
3555     if (s->block_last_index[i] >= 0) {
3556         s->dct_unquantize_inter(s, block, i, qscale);
3557
3558         s->dsp.idct_add (dest, line_size, block);
3559     }
3560 }
3561
3562 /**
3563  * cleans dc, ac, coded_block for the current non intra MB
3564  */
3565 void ff_clean_intra_table_entries(MpegEncContext *s)
3566 {
3567     int wrap = s->b8_stride;
3568     int xy = s->block_index[0];
3569
3570     s->dc_val[0][xy           ] =
3571     s->dc_val[0][xy + 1       ] =
3572     s->dc_val[0][xy     + wrap] =
3573     s->dc_val[0][xy + 1 + wrap] = 1024;
3574     /* ac pred */
3575     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3576     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3577     if (s->msmpeg4_version>=3) {
3578         s->coded_block[xy           ] =
3579         s->coded_block[xy + 1       ] =
3580         s->coded_block[xy     + wrap] =
3581         s->coded_block[xy + 1 + wrap] = 0;
3582     }
3583     /* chroma */
3584     wrap = s->mb_stride;
3585     xy = s->mb_x + s->mb_y * wrap;
3586     s->dc_val[1][xy] =
3587     s->dc_val[2][xy] = 1024;
3588     /* ac pred */
3589     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3590     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3591
3592     s->mbintra_table[xy]= 0;
3593 }
3594
3595 /* generic function called after a macroblock has been parsed by the
3596    decoder or after it has been encoded by the encoder.
3597
3598    Important variables used:
3599    s->mb_intra : true if intra macroblock
3600    s->mv_dir   : motion vector direction
3601    s->mv_type  : motion vector type
3602    s->mv       : motion vector
3603    s->interlaced_dct : true if interlaced dct used (mpeg2)
3604  */
3605 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3606 {
3607     int mb_x, mb_y;
3608     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3609 #ifdef HAVE_XVMC
3610     if(s->avctx->xvmc_acceleration){
3611         XVMC_decode_mb(s);//xvmc uses pblocks
3612         return;
3613     }
3614 #endif
3615
3616     mb_x = s->mb_x;
3617     mb_y = s->mb_y;
3618
3619     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3620        /* save DCT coefficients */
3621        int i,j;
3622        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3623        for(i=0; i<6; i++)
3624            for(j=0; j<64; j++)
3625                *dct++ = block[i][s->dsp.idct_permutation[j]];
3626     }
3627
3628     s->current_picture.qscale_table[mb_xy]= s->qscale;
3629
3630     /* update DC predictors for P macroblocks */
3631     if (!s->mb_intra) {
3632         if (s->h263_pred || s->h263_aic) {
3633             if(s->mbintra_table[mb_xy])
3634                 ff_clean_intra_table_entries(s);
3635         } else {
3636             s->last_dc[0] =
3637             s->last_dc[1] =
3638             s->last_dc[2] = 128 << s->intra_dc_precision;
3639         }
3640     }
3641     else if (s->h263_pred || s->h263_aic)
3642         s->mbintra_table[mb_xy]=1;
3643
3644     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3645         uint8_t *dest_y, *dest_cb, *dest_cr;
3646         int dct_linesize, dct_offset;
3647         op_pixels_func (*op_pix)[4];
3648         qpel_mc_func (*op_qpix)[16];
3649         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3650         const int uvlinesize= s->current_picture.linesize[1];
3651         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3652         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3653
3654         /* avoid copy if macroblock skipped in last frame too */
3655         /* skip only during decoding as we might trash the buffers during encoding a bit */
3656         if(!s->encoding){
3657             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3658             const int age= s->current_picture.age;
3659
3660             assert(age);
3661
3662             if (s->mb_skiped) {
3663                 s->mb_skiped= 0;
3664                 assert(s->pict_type!=I_TYPE);
3665
3666                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3667                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3668
3669                 /* if previous was skipped too, then nothing to do !  */
3670                 if (*mbskip_ptr >= age && s->current_picture.reference){
3671                     return;
3672                 }
3673             } else if(!s->current_picture.reference){
3674                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3675                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3676             } else{
3677                 *mbskip_ptr = 0; /* not skipped */
3678             }
3679         }
3680
3681         dct_linesize = linesize << s->interlaced_dct;
3682         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3683
3684         if(readable){
3685             dest_y=  s->dest[0];
3686             dest_cb= s->dest[1];
3687             dest_cr= s->dest[2];
3688         }else{
3689             dest_y = s->b_scratchpad;
3690             dest_cb= s->b_scratchpad+16*linesize;
3691             dest_cr= s->b_scratchpad+32*linesize;
3692         }
3693
3694         if (!s->mb_intra) {
3695             /* motion handling */
3696             /* decoding or more than one mb_type (MC was allready done otherwise) */
3697             if(!s->encoding){
3698                 if(lowres_flag){
3699                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3700
3701                     if (s->mv_dir & MV_DIR_FORWARD) {
3702                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3703                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3704                     }
3705                     if (s->mv_dir & MV_DIR_BACKWARD) {
3706                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3707                     }
3708                 }else{
3709                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3710                         op_pix = s->dsp.put_pixels_tab;
3711                         op_qpix= s->dsp.put_qpel_pixels_tab;
3712                     }else{
3713                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3714                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3715                     }
3716                     if (s->mv_dir & MV_DIR_FORWARD) {
3717                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3718                         op_pix = s->dsp.avg_pixels_tab;
3719                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3720                     }
3721                     if (s->mv_dir & MV_DIR_BACKWARD) {
3722                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3723                     }
3724                 }
3725             }
3726
3727             /* skip dequant / idct if we are really late ;) */
3728             if(s->hurry_up>1) return;
3729
3730             /* add dct residue */
3731             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3732                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3733                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3734                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3735                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3736                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3737
3738                 if(!(s->flags&CODEC_FLAG_GRAY)){
3739                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3740                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3741                 }
3742             } else if(s->codec_id != CODEC_ID_WMV2){
3743                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3744                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3745                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3746                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3747
3748                 if(!(s->flags&CODEC_FLAG_GRAY)){
3749                     if(s->chroma_y_shift){//Chroma420
3750                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3751                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3752                     }else{
3753                         //chroma422
3754                         dct_linesize = uvlinesize << s->interlaced_dct;
3755                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3756
3757                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3758                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3759                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3760                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3761                         if(!s->chroma_x_shift){//Chroma444
3762                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3763                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3764                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3765                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3766                         }
3767                     }
3768                 }//fi gray
3769             }
3770 #ifdef CONFIG_RISKY
3771             else{
3772                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3773             }
3774 #endif
3775         } else {
3776             /* dct only in intra block */
3777             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3778                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3779                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3780                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3781                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3782
3783                 if(!(s->flags&CODEC_FLAG_GRAY)){
3784                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3785                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3786                 }
3787             }else{
3788                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3789                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3790                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3791                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3792
3793                 if(!(s->flags&CODEC_FLAG_GRAY)){
3794                     if(s->chroma_y_shift){
3795                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3796                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3797                     }else{
3798
3799                         dct_linesize = uvlinesize << s->interlaced_dct;
3800                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3801
3802                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3803                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3804                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3805                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3806                         if(!s->chroma_x_shift){//Chroma444
3807                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3808                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3809                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3810                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3811                         }
3812                     }
3813                 }//gray
3814             }
3815         }
3816         if(!readable){
3817             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3818             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3819             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3820         }
3821     }
3822 }
3823
3824 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3825     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3826     else                  MPV_decode_mb_internal(s, block, 0);
3827 }
3828
3829 #ifdef CONFIG_ENCODERS
3830
3831 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3832 {
3833     static const char tab[64]=
3834         {3,2,2,1,1,1,1,1,
3835          1,1,1,1,1,1,1,1,
3836          1,1,1,1,1,1,1,1,
3837          0,0,0,0,0,0,0,0,
3838          0,0,0,0,0,0,0,0,
3839          0,0,0,0,0,0,0,0,
3840          0,0,0,0,0,0,0,0,
3841          0,0,0,0,0,0,0,0};
3842     int score=0;
3843     int run=0;
3844     int i;
3845     DCTELEM *block= s->block[n];
3846     const int last_index= s->block_last_index[n];
3847     int skip_dc;
3848
3849     if(threshold<0){
3850         skip_dc=0;
3851         threshold= -threshold;
3852     }else
3853         skip_dc=1;
3854
3855     /* are all which we could set to zero are allready zero? */
3856     if(last_index<=skip_dc - 1) return;
3857
3858     for(i=0; i<=last_index; i++){
3859         const int j = s->intra_scantable.permutated[i];
3860         const int level = ABS(block[j]);
3861         if(level==1){
3862             if(skip_dc && i==0) continue;
3863             score+= tab[run];
3864             run=0;
3865         }else if(level>1){
3866             return;
3867         }else{
3868             run++;
3869         }
3870     }
3871     if(score >= threshold) return;
3872     for(i=skip_dc; i<=last_index; i++){
3873         const int j = s->intra_scantable.permutated[i];
3874         block[j]=0;
3875     }
3876     if(block[0]) s->block_last_index[n]= 0;
3877     else         s->block_last_index[n]= -1;
3878 }
3879
3880 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3881 {
3882     int i;
3883     const int maxlevel= s->max_qcoeff;
3884     const int minlevel= s->min_qcoeff;
3885     int overflow=0;
3886
3887     if(s->mb_intra){
3888         i=1; //skip clipping of intra dc
3889     }else
3890         i=0;
3891
3892     for(;i<=last_index; i++){
3893         const int j= s->intra_scantable.permutated[i];
3894         int level = block[j];
3895
3896         if     (level>maxlevel){
3897             level=maxlevel;
3898             overflow++;
3899         }else if(level<minlevel){
3900             level=minlevel;
3901             overflow++;
3902         }
3903
3904         block[j]= level;
3905     }
3906
3907     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3908         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3909 }
3910
3911 #endif //CONFIG_ENCODERS
3912
3913 /**
3914  *
3915  * @param h is the normal height, this will be reduced automatically if needed for the last row
3916  */
3917 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3918     if (s->avctx->draw_horiz_band) {
3919         AVFrame *src;
3920         int offset[4];
3921
3922         if(s->picture_structure != PICT_FRAME){
3923             h <<= 1;
3924             y <<= 1;
3925             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3926         }
3927
3928         h= FFMIN(h, s->avctx->height - y);
3929
3930         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3931             src= (AVFrame*)s->current_picture_ptr;
3932         else if(s->last_picture_ptr)
3933             src= (AVFrame*)s->last_picture_ptr;
3934         else
3935             return;
3936
3937         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3938             offset[0]=
3939             offset[1]=
3940             offset[2]=
3941             offset[3]= 0;
3942         }else{
3943             offset[0]= y * s->linesize;;
3944             offset[1]=
3945             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3946             offset[3]= 0;
3947         }
3948
3949         emms_c();
3950
3951         s->avctx->draw_horiz_band(s->avctx, src, offset,
3952                                   y, s->picture_structure, h);
3953     }
3954 }
3955
3956 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3957     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3958     const int uvlinesize= s->current_picture.linesize[1];
3959     const int mb_size= 4 - s->avctx->lowres;
3960
3961     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
3962     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
3963     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
3964     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3965     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3966     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3967     //block_index is not used by mpeg2, so it is not affected by chroma_format
3968
3969     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
3970     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
3971     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
3972
3973     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
3974     {
3975         s->dest[0] += s->mb_y *   linesize << mb_size;
3976         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
3977         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
3978     }
3979 }
3980
3981 #ifdef CONFIG_ENCODERS
3982
3983 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
3984     int x, y;
3985 //FIXME optimize
3986     for(y=0; y<8; y++){
3987         for(x=0; x<8; x++){
3988             int x2, y2;
3989             int sum=0;
3990             int sqr=0;
3991             int count=0;
3992
3993             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
3994                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
3995                     int v= ptr[x2 + y2*stride];
3996                     sum += v;
3997                     sqr += v*v;
3998                     count++;
3999                 }
4000             }
4001             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4002         }
4003     }
4004 }
4005
4006 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4007 {
4008     int16_t weight[6][64];
4009     DCTELEM orig[6][64];
4010     const int mb_x= s->mb_x;
4011     const int mb_y= s->mb_y;
4012     int i;
4013     int skip_dct[6];
4014     int dct_offset   = s->linesize*8; //default for progressive frames
4015     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4016     int wrap_y, wrap_c;
4017
4018     for(i=0; i<6; i++) skip_dct[i]=0;
4019
4020     if(s->adaptive_quant){
4021         const int last_qp= s->qscale;
4022         const int mb_xy= mb_x + mb_y*s->mb_stride;
4023
4024         s->lambda= s->lambda_table[mb_xy];
4025         update_qscale(s);
4026
4027         if(!(s->flags&CODEC_FLAG_QP_RD)){
4028             s->dquant= s->qscale - last_qp;
4029
4030             if(s->out_format==FMT_H263){
4031                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4032
4033                 if(s->codec_id==CODEC_ID_MPEG4){
4034                     if(!s->mb_intra){
4035                         if(s->pict_type == B_TYPE){
4036                             if(s->dquant&1)
4037                                 s->dquant= (s->dquant/2)*2;
4038                             if(s->mv_dir&MV_DIRECT)
4039                                 s->dquant= 0;
4040                         }
4041                         if(s->mv_type==MV_TYPE_8X8)
4042                             s->dquant=0;
4043                     }
4044                 }
4045             }
4046         }
4047         ff_set_qscale(s, last_qp + s->dquant);
4048     }else if(s->flags&CODEC_FLAG_QP_RD)
4049         ff_set_qscale(s, s->qscale + s->dquant);
4050
4051     wrap_y = s->linesize;
4052     wrap_c = s->uvlinesize;
4053     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4054     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4055     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4056
4057     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4058         uint8_t *ebuf= s->edge_emu_buffer + 32;
4059         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4060         ptr_y= ebuf;
4061         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4062         ptr_cb= ebuf+18*wrap_y;
4063         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4064         ptr_cr= ebuf+18*wrap_y+8;
4065     }
4066
4067     if (s->mb_intra) {
4068         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4069             int progressive_score, interlaced_score;
4070
4071             s->interlaced_dct=0;
4072             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4073                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4074
4075             if(progressive_score > 0){
4076                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4077                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4078                 if(progressive_score > interlaced_score){
4079                     s->interlaced_dct=1;
4080
4081                     dct_offset= wrap_y;
4082                     wrap_y<<=1;
4083                 }
4084             }
4085         }
4086
4087         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4088         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4089         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4090         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4091
4092         if(s->flags&CODEC_FLAG_GRAY){
4093             skip_dct[4]= 1;
4094             skip_dct[5]= 1;
4095         }else{
4096             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4097             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4098         }
4099     }else{
4100         op_pixels_func (*op_pix)[4];
4101         qpel_mc_func (*op_qpix)[16];
4102         uint8_t *dest_y, *dest_cb, *dest_cr;
4103
4104         dest_y  = s->dest[0];
4105         dest_cb = s->dest[1];
4106         dest_cr = s->dest[2];
4107
4108         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4109             op_pix = s->dsp.put_pixels_tab;
4110             op_qpix= s->dsp.put_qpel_pixels_tab;
4111         }else{
4112             op_pix = s->dsp.put_no_rnd_pixels_tab;
4113             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4114         }
4115
4116         if (s->mv_dir & MV_DIR_FORWARD) {
4117             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4118             op_pix = s->dsp.avg_pixels_tab;
4119             op_qpix= s->dsp.avg_qpel_pixels_tab;
4120         }
4121         if (s->mv_dir & MV_DIR_BACKWARD) {
4122             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4123         }
4124
4125         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4126             int progressive_score, interlaced_score;
4127
4128             s->interlaced_dct=0;
4129             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4130                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4131
4132             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4133
4134             if(progressive_score>0){
4135                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4136                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4137
4138                 if(progressive_score > interlaced_score){
4139                     s->interlaced_dct=1;
4140
4141                     dct_offset= wrap_y;
4142                     wrap_y<<=1;
4143                 }
4144             }
4145         }
4146
4147         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4148         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4149         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4150         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4151
4152         if(s->flags&CODEC_FLAG_GRAY){
4153             skip_dct[4]= 1;
4154             skip_dct[5]= 1;
4155         }else{
4156             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4157             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4158         }
4159         /* pre quantization */
4160         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4161             //FIXME optimize
4162             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4163             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4164             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4165             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4166             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4167             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4168         }
4169     }
4170
4171     if(s->avctx->quantizer_noise_shaping){
4172         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4173         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4174         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4175         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4176         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4177         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4178         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4179     }
4180
4181     /* DCT & quantize */
4182     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4183     {
4184         for(i=0;i<6;i++) {
4185             if(!skip_dct[i]){
4186                 int overflow;
4187                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4188             // FIXME we could decide to change to quantizer instead of clipping
4189             // JS: I don't think that would be a good idea it could lower quality instead
4190             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4191                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4192             }else
4193                 s->block_last_index[i]= -1;
4194         }
4195         if(s->avctx->quantizer_noise_shaping){
4196             for(i=0;i<6;i++) {
4197                 if(!skip_dct[i]){
4198                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4199                 }
4200             }
4201         }
4202
4203         if(s->luma_elim_threshold && !s->mb_intra)
4204             for(i=0; i<4; i++)
4205                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4206         if(s->chroma_elim_threshold && !s->mb_intra)
4207             for(i=4; i<6; i++)
4208                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4209
4210         if(s->flags & CODEC_FLAG_CBP_RD){
4211             for(i=0;i<6;i++) {
4212                 if(s->block_last_index[i] == -1)
4213                     s->coded_score[i]= INT_MAX/256;
4214             }
4215         }
4216     }
4217
4218     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4219         s->block_last_index[4]=
4220         s->block_last_index[5]= 0;
4221         s->block[4][0]=
4222         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4223     }
4224
4225     //non c quantize code returns incorrect block_last_index FIXME
4226     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4227         for(i=0; i<6; i++){
4228             int j;
4229             if(s->block_last_index[i]>0){
4230                 for(j=63; j>0; j--){
4231                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4232                 }
4233                 s->block_last_index[i]= j;
4234             }
4235         }
4236     }
4237
4238     /* huffman encode */
4239     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4240     case CODEC_ID_MPEG1VIDEO:
4241     case CODEC_ID_MPEG2VIDEO:
4242         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4243 #ifdef CONFIG_RISKY
4244     case CODEC_ID_MPEG4:
4245         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4246     case CODEC_ID_MSMPEG4V2:
4247     case CODEC_ID_MSMPEG4V3:
4248     case CODEC_ID_WMV1:
4249         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4250     case CODEC_ID_WMV2:
4251          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4252     case CODEC_ID_H261:
4253         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4254     case CODEC_ID_H263:
4255     case CODEC_ID_H263P:
4256     case CODEC_ID_FLV1:
4257     case CODEC_ID_RV10:
4258     case CODEC_ID_RV20:
4259         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4260 #endif
4261     case CODEC_ID_MJPEG:
4262         mjpeg_encode_mb(s, s->block); break;
4263     default:
4264         assert(0);
4265     }
4266 }
4267
4268 #endif //CONFIG_ENCODERS
4269
4270 void ff_mpeg_flush(AVCodecContext *avctx){
4271     int i;
4272     MpegEncContext *s = avctx->priv_data;
4273
4274     if(s==NULL || s->picture==NULL)
4275         return;
4276
4277     for(i=0; i<MAX_PICTURE_COUNT; i++){
4278        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4279                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4280         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4281     }
4282     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4283
4284     s->mb_x= s->mb_y= 0;
4285
4286     s->parse_context.state= -1;
4287     s->parse_context.frame_start_found= 0;
4288     s->parse_context.overread= 0;
4289     s->parse_context.overread_index= 0;
4290     s->parse_context.index= 0;
4291     s->parse_context.last_index= 0;
4292     s->bitstream_buffer_size=0;
4293 }
4294
4295 #ifdef CONFIG_ENCODERS
4296 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4297 {
4298     const uint16_t *srcw= (uint16_t*)src;
4299     int words= length>>4;
4300     int bits= length&15;
4301     int i;
4302
4303     if(length==0) return;
4304
4305     if(words < 16){
4306         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4307     }else if(put_bits_count(pb)&7){
4308         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4309     }else{
4310         for(i=0; put_bits_count(pb)&31; i++)
4311             put_bits(pb, 8, src[i]);
4312         flush_put_bits(pb);
4313         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4314         skip_put_bytes(pb, 2*words-i);
4315     }
4316
4317     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4318 }
4319
4320 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4321     int i;
4322
4323     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4324
4325     /* mpeg1 */
4326     d->mb_skip_run= s->mb_skip_run;
4327     for(i=0; i<3; i++)
4328         d->last_dc[i]= s->last_dc[i];
4329
4330     /* statistics */
4331     d->mv_bits= s->mv_bits;
4332     d->i_tex_bits= s->i_tex_bits;
4333     d->p_tex_bits= s->p_tex_bits;
4334     d->i_count= s->i_count;
4335     d->f_count= s->f_count;
4336     d->b_count= s->b_count;
4337     d->skip_count= s->skip_count;
4338     d->misc_bits= s->misc_bits;
4339     d->last_bits= 0;
4340
4341     d->mb_skiped= 0;
4342     d->qscale= s->qscale;
4343     d->dquant= s->dquant;
4344 }
4345
4346 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4347     int i;
4348
4349     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4350     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4351
4352     /* mpeg1 */
4353     d->mb_skip_run= s->mb_skip_run;
4354     for(i=0; i<3; i++)
4355         d->last_dc[i]= s->last_dc[i];
4356
4357     /* statistics */
4358     d->mv_bits= s->mv_bits;
4359     d->i_tex_bits= s->i_tex_bits;
4360     d->p_tex_bits= s->p_tex_bits;
4361     d->i_count= s->i_count;
4362     d->f_count= s->f_count;
4363     d->b_count= s->b_count;
4364     d->skip_count= s->skip_count;
4365     d->misc_bits= s->misc_bits;
4366
4367     d->mb_intra= s->mb_intra;
4368     d->mb_skiped= s->mb_skiped;
4369     d->mv_type= s->mv_type;
4370     d->mv_dir= s->mv_dir;
4371     d->pb= s->pb;
4372     if(s->data_partitioning){
4373         d->pb2= s->pb2;
4374         d->tex_pb= s->tex_pb;
4375     }
4376     d->block= s->block;
4377     for(i=0; i<6; i++)
4378         d->block_last_index[i]= s->block_last_index[i];
4379     d->interlaced_dct= s->interlaced_dct;
4380     d->qscale= s->qscale;
4381 }
4382
4383 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4384                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4385                            int *dmin, int *next_block, int motion_x, int motion_y)
4386 {
4387     int score;
4388     uint8_t *dest_backup[3];
4389
4390     copy_context_before_encode(s, backup, type);
4391
4392     s->block= s->blocks[*next_block];
4393     s->pb= pb[*next_block];
4394     if(s->data_partitioning){
4395         s->pb2   = pb2   [*next_block];
4396         s->tex_pb= tex_pb[*next_block];
4397     }
4398
4399     if(*next_block){
4400         memcpy(dest_backup, s->dest, sizeof(s->dest));
4401         s->dest[0] = s->rd_scratchpad;
4402         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4403         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4404         assert(s->linesize >= 32); //FIXME
4405     }
4406
4407     encode_mb(s, motion_x, motion_y);
4408
4409     score= put_bits_count(&s->pb);
4410     if(s->data_partitioning){
4411         score+= put_bits_count(&s->pb2);
4412         score+= put_bits_count(&s->tex_pb);
4413     }
4414
4415     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4416         MPV_decode_mb(s, s->block);
4417
4418         score *= s->lambda2;
4419         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4420     }
4421
4422     if(*next_block){
4423         memcpy(s->dest, dest_backup, sizeof(s->dest));
4424     }
4425
4426     if(score<*dmin){
4427         *dmin= score;
4428         *next_block^=1;
4429
4430         copy_context_after_encode(best, s, type);
4431     }
4432 }
4433
4434 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4435     uint32_t *sq = squareTbl + 256;
4436     int acc=0;
4437     int x,y;
4438
4439     if(w==16 && h==16)
4440         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4441     else if(w==8 && h==8)
4442         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4443
4444     for(y=0; y<h; y++){
4445         for(x=0; x<w; x++){
4446             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4447         }
4448     }
4449
4450     assert(acc>=0);
4451
4452     return acc;
4453 }
4454
4455 static int sse_mb(MpegEncContext *s){
4456     int w= 16;
4457     int h= 16;
4458
4459     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4460     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4461
4462     if(w==16 && h==16)
4463       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4464         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4465                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4466                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4467       }else{
4468         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4469                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4470                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4471       }
4472     else
4473         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4474                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4475                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4476 }
4477
4478 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4479     MpegEncContext *s= arg;
4480
4481
4482     s->me.pre_pass=1;
4483     s->me.dia_size= s->avctx->pre_dia_size;
4484     s->first_slice_line=1;
4485     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4486         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4487             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4488         }
4489         s->first_slice_line=0;
4490     }
4491
4492     s->me.pre_pass=0;
4493
4494     return 0;
4495 }
4496
4497 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4498     MpegEncContext *s= arg;
4499
4500     s->me.dia_size= s->avctx->dia_size;
4501     s->first_slice_line=1;
4502     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4503         s->mb_x=0; //for block init below
4504         ff_init_block_index(s);
4505         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4506             s->block_index[0]+=2;
4507             s->block_index[1]+=2;
4508             s->block_index[2]+=2;
4509             s->block_index[3]+=2;
4510
4511             /* compute motion vector & mb_type and store in context */
4512             if(s->pict_type==B_TYPE)
4513                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4514             else
4515                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4516         }
4517         s->first_slice_line=0;
4518     }
4519     return 0;
4520 }
4521
4522 static int mb_var_thread(AVCodecContext *c, void *arg){
4523     MpegEncContext *s= arg;
4524     int mb_x, mb_y;
4525
4526     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4527         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4528             int xx = mb_x * 16;
4529             int yy = mb_y * 16;
4530             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4531             int varc;
4532             int sum = s->dsp.pix_sum(pix, s->linesize);
4533
4534             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4535
4536             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4537             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4538             s->me.mb_var_sum_temp    += varc;
4539         }
4540     }
4541     return 0;
4542 }
4543
4544 static void write_slice_end(MpegEncContext *s){
4545     if(s->codec_id==CODEC_ID_MPEG4){
4546         if(s->partitioned_frame){
4547             ff_mpeg4_merge_partitions(s);
4548         }
4549
4550         ff_mpeg4_stuffing(&s->pb);
4551     }else if(s->out_format == FMT_MJPEG){
4552         ff_mjpeg_stuffing(&s->pb);
4553     }
4554
4555     align_put_bits(&s->pb);
4556     flush_put_bits(&s->pb);
4557 }
4558
4559 static int encode_thread(AVCodecContext *c, void *arg){
4560     MpegEncContext *s= arg;
4561     int mb_x, mb_y, pdif = 0;
4562     int i, j;
4563     MpegEncContext best_s, backup_s;
4564     uint8_t bit_buf[2][MAX_MB_BYTES];
4565     uint8_t bit_buf2[2][MAX_MB_BYTES];
4566     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4567     PutBitContext pb[2], pb2[2], tex_pb[2];
4568 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4569
4570     for(i=0; i<2; i++){
4571         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4572         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4573         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4574     }
4575
4576     s->last_bits= put_bits_count(&s->pb);
4577     s->mv_bits=0;
4578     s->misc_bits=0;
4579     s->i_tex_bits=0;
4580     s->p_tex_bits=0;
4581     s->i_count=0;
4582     s->f_count=0;
4583     s->b_count=0;
4584     s->skip_count=0;
4585
4586     for(i=0; i<3; i++){
4587         /* init last dc values */
4588         /* note: quant matrix value (8) is implied here */
4589         s->last_dc[i] = 128 << s->intra_dc_precision;
4590
4591         s->current_picture_ptr->error[i] = 0;
4592     }
4593     s->mb_skip_run = 0;
4594     memset(s->last_mv, 0, sizeof(s->last_mv));
4595
4596     s->last_mv_dir = 0;
4597
4598 #ifdef CONFIG_RISKY
4599     switch(s->codec_id){
4600     case CODEC_ID_H263:
4601     case CODEC_ID_H263P:
4602     case CODEC_ID_FLV1:
4603         s->gob_index = ff_h263_get_gob_height(s);
4604         break;
4605     case CODEC_ID_MPEG4:
4606         if(s->partitioned_frame)
4607             ff_mpeg4_init_partitions(s);
4608         break;
4609     }
4610 #endif
4611
4612     s->resync_mb_x=0;
4613     s->resync_mb_y=0;
4614     s->first_slice_line = 1;
4615     s->ptr_lastgob = s->pb.buf;
4616     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4617 //    printf("row %d at %X\n", s->mb_y, (int)s);
4618         s->mb_x=0;
4619         s->mb_y= mb_y;
4620
4621         ff_set_qscale(s, s->qscale);
4622         ff_init_block_index(s);
4623
4624         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4625             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4626             int mb_type= s->mb_type[xy];
4627 //            int d;
4628             int dmin= INT_MAX;
4629             int dir;
4630
4631             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4632                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4633                 return -1;
4634             }
4635             if(s->data_partitioning){
4636                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4637                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4638                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4639                     return -1;
4640                 }
4641             }
4642
4643             s->mb_x = mb_x;
4644             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4645             ff_update_block_index(s);
4646
4647             if(s->codec_id == CODEC_ID_H261){
4648                 ff_h261_reorder_mb_index(s);
4649                 xy= s->mb_y*s->mb_stride + s->mb_x;
4650                 mb_type= s->mb_type[xy];
4651             }
4652
4653             /* write gob / video packet header  */
4654 #ifdef CONFIG_RISKY
4655             if(s->rtp_mode){
4656                 int current_packet_size, is_gob_start;
4657
4658                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4659
4660                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4661
4662                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4663
4664                 switch(s->codec_id){
4665                 case CODEC_ID_H263:
4666                 case CODEC_ID_H263P:
4667                     if(!s->h263_slice_structured)
4668                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4669                     break;
4670                 case CODEC_ID_MPEG2VIDEO:
4671                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4672                 case CODEC_ID_MPEG1VIDEO:
4673                     if(s->mb_skip_run) is_gob_start=0;
4674                     break;
4675                 }
4676
4677                 if(is_gob_start){
4678                     if(s->start_mb_y != mb_y || mb_x!=0){
4679                         write_slice_end(s);
4680
4681                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4682                             ff_mpeg4_init_partitions(s);
4683                         }
4684                     }
4685
4686                     assert((put_bits_count(&s->pb)&7) == 0);
4687                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4688
4689                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4690                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4691                         int d= 100 / s->avctx->error_rate;
4692                         if(r % d == 0){
4693                             current_packet_size=0;
4694 #ifndef ALT_BITSTREAM_WRITER
4695                             s->pb.buf_ptr= s->ptr_lastgob;
4696 #endif
4697                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4698                         }
4699                     }
4700
4701                     if (s->avctx->rtp_callback)
4702                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, 0);
4703
4704                     switch(s->codec_id){
4705                     case CODEC_ID_MPEG4:
4706                         ff_mpeg4_encode_video_packet_header(s);
4707                         ff_mpeg4_clean_buffers(s);
4708                     break;
4709                     case CODEC_ID_MPEG1VIDEO:
4710                     case CODEC_ID_MPEG2VIDEO:
4711                         ff_mpeg1_encode_slice_header(s);
4712                         ff_mpeg1_clean_buffers(s);
4713                     break;
4714                     case CODEC_ID_H263:
4715                     case CODEC_ID_H263P:
4716                         h263_encode_gob_header(s, mb_y);
4717                     break;
4718                     }
4719
4720                     if(s->flags&CODEC_FLAG_PASS1){
4721                         int bits= put_bits_count(&s->pb);
4722                         s->misc_bits+= bits - s->last_bits;
4723                         s->last_bits= bits;
4724                     }
4725
4726                     s->ptr_lastgob += current_packet_size;
4727                     s->first_slice_line=1;
4728                     s->resync_mb_x=mb_x;
4729                     s->resync_mb_y=mb_y;
4730                 }
4731             }
4732 #endif
4733
4734             if(  (s->resync_mb_x   == s->mb_x)
4735                && s->resync_mb_y+1 == s->mb_y){
4736                 s->first_slice_line=0;
4737             }
4738
4739             s->mb_skiped=0;
4740             s->dquant=0; //only for QP_RD
4741
4742             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4743                 int next_block=0;
4744                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4745
4746                 copy_context_before_encode(&backup_s, s, -1);
4747                 backup_s.pb= s->pb;
4748                 best_s.data_partitioning= s->data_partitioning;
4749                 best_s.partitioned_frame= s->partitioned_frame;
4750                 if(s->data_partitioning){
4751                     backup_s.pb2= s->pb2;
4752                     backup_s.tex_pb= s->tex_pb;
4753                 }
4754
4755                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4756                     s->mv_dir = MV_DIR_FORWARD;
4757                     s->mv_type = MV_TYPE_16X16;
4758                     s->mb_intra= 0;
4759                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4760                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4761                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4762                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4763                 }
4764                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4765                     s->mv_dir = MV_DIR_FORWARD;
4766                     s->mv_type = MV_TYPE_FIELD;
4767                     s->mb_intra= 0;
4768                     for(i=0; i<2; i++){
4769                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4770                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4771                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4772                     }
4773                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4774                                  &dmin, &next_block, 0, 0);
4775                 }
4776                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4777                     s->mv_dir = MV_DIR_FORWARD;
4778                     s->mv_type = MV_TYPE_16X16;
4779                     s->mb_intra= 0;
4780                     s->mv[0][0][0] = 0;
4781                     s->mv[0][0][1] = 0;
4782                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb,
4783                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4784                 }
4785                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4786                     s->mv_dir = MV_DIR_FORWARD;
4787                     s->mv_type = MV_TYPE_8X8;
4788                     s->mb_intra= 0;
4789                     for(i=0; i<4; i++){
4790                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4791                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4792                     }
4793                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4794                                  &dmin, &next_block, 0, 0);
4795                 }
4796                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4797                     s->mv_dir = MV_DIR_FORWARD;
4798                     s->mv_type = MV_TYPE_16X16;
4799                     s->mb_intra= 0;
4800                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4801                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4802                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4803                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4804                 }
4805                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4806                     s->mv_dir = MV_DIR_BACKWARD;
4807                     s->mv_type = MV_TYPE_16X16;
4808                     s->mb_intra= 0;
4809                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4810                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4811                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4812                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4813                 }
4814                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4815                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4816                     s->mv_type = MV_TYPE_16X16;
4817                     s->mb_intra= 0;
4818                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4819                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4820                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4821                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4822                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4823                                  &dmin, &next_block, 0, 0);
4824                 }
4825                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4826                     int mx= s->b_direct_mv_table[xy][0];
4827                     int my= s->b_direct_mv_table[xy][1];
4828
4829                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4830                     s->mb_intra= 0;
4831 #ifdef CONFIG_RISKY
4832                     ff_mpeg4_set_direct_mv(s, mx, my);
4833 #endif
4834                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4835                                  &dmin, &next_block, mx, my);
4836                 }
4837                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4838                     s->mv_dir = MV_DIR_FORWARD;
4839                     s->mv_type = MV_TYPE_FIELD;
4840                     s->mb_intra= 0;
4841                     for(i=0; i<2; i++){
4842                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4843                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4844                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4845                     }
4846                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4847                                  &dmin, &next_block, 0, 0);
4848                 }
4849                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4850                     s->mv_dir = MV_DIR_BACKWARD;
4851                     s->mv_type = MV_TYPE_FIELD;
4852                     s->mb_intra= 0;
4853                     for(i=0; i<2; i++){
4854                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4855                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4856                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4857                     }
4858                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4859                                  &dmin, &next_block, 0, 0);
4860                 }
4861                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4862                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4863                     s->mv_type = MV_TYPE_FIELD;
4864                     s->mb_intra= 0;
4865                     for(dir=0; dir<2; dir++){
4866                         for(i=0; i<2; i++){
4867                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4868                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4869                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4870                         }
4871                     }
4872                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4873                                  &dmin, &next_block, 0, 0);
4874                 }
4875                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4876                     s->mv_dir = 0;
4877                     s->mv_type = MV_TYPE_16X16;
4878                     s->mb_intra= 1;
4879                     s->mv[0][0][0] = 0;
4880                     s->mv[0][0][1] = 0;
4881                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4882                                  &dmin, &next_block, 0, 0);
4883                     if(s->h263_pred || s->h263_aic){
4884                         if(best_s.mb_intra)
4885                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4886                         else
4887                             ff_clean_intra_table_entries(s); //old mode?
4888                     }
4889                 }
4890
4891                 if(s->flags & CODEC_FLAG_QP_RD){
4892                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4893                         const int last_qp= backup_s.qscale;
4894                         int dquant, dir, qp, dc[6];
4895                         DCTELEM ac[6][16];
4896                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4897
4898                         assert(backup_s.dquant == 0);
4899
4900                         //FIXME intra
4901                         s->mv_dir= best_s.mv_dir;
4902                         s->mv_type = MV_TYPE_16X16;
4903                         s->mb_intra= best_s.mb_intra;
4904                         s->mv[0][0][0] = best_s.mv[0][0][0];
4905                         s->mv[0][0][1] = best_s.mv[0][0][1];
4906                         s->mv[1][0][0] = best_s.mv[1][0][0];
4907                         s->mv[1][0][1] = best_s.mv[1][0][1];
4908
4909                         dir= s->pict_type == B_TYPE ? 2 : 1;
4910                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4911                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4912                             qp= last_qp + dquant;
4913                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4914                                 break;
4915                             backup_s.dquant= dquant;
4916                             if(s->mb_intra && s->dc_val[0]){
4917                                 for(i=0; i<6; i++){
4918                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4919                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4920                                 }
4921                             }
4922
4923                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4924                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4925                             if(best_s.qscale != qp){
4926                                 if(s->mb_intra && s->dc_val[0]){
4927                                     for(i=0; i<6; i++){
4928                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4929                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4930                                     }
4931                                 }
4932                                 if(dir > 0 && dquant==dir){
4933                                     dquant= 0;
4934                                     dir= -dir;
4935                                 }else
4936                                     break;
4937                             }
4938                         }
4939                         qp= best_s.qscale;
4940                         s->current_picture.qscale_table[xy]= qp;
4941                     }
4942                 }
4943
4944                 copy_context_after_encode(s, &best_s, -1);
4945
4946                 pb_bits_count= put_bits_count(&s->pb);
4947                 flush_put_bits(&s->pb);
4948                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4949                 s->pb= backup_s.pb;
4950
4951                 if(s->data_partitioning){
4952                     pb2_bits_count= put_bits_count(&s->pb2);
4953                     flush_put_bits(&s->pb2);
4954                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4955                     s->pb2= backup_s.pb2;
4956
4957                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4958                     flush_put_bits(&s->tex_pb);
4959                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4960                     s->tex_pb= backup_s.tex_pb;
4961                 }
4962                 s->last_bits= put_bits_count(&s->pb);
4963
4964 #ifdef CONFIG_RISKY
4965                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4966                     ff_h263_update_motion_val(s);
4967 #endif
4968
4969                 if(next_block==0){ //FIXME 16 vs linesize16
4970                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
4971                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
4972                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
4973                 }
4974
4975                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4976                     MPV_decode_mb(s, s->block);
4977             } else {
4978                 int motion_x, motion_y;
4979                 s->mv_type=MV_TYPE_16X16;
4980                 // only one MB-Type possible
4981
4982                 switch(mb_type){
4983                 case CANDIDATE_MB_TYPE_INTRA:
4984                     s->mv_dir = 0;
4985                     s->mb_intra= 1;
4986                     motion_x= s->mv[0][0][0] = 0;
4987                     motion_y= s->mv[0][0][1] = 0;
4988                     break;
4989                 case CANDIDATE_MB_TYPE_INTER:
4990                     s->mv_dir = MV_DIR_FORWARD;
4991                     s->mb_intra= 0;
4992                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4993                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4994                     break;
4995                 case CANDIDATE_MB_TYPE_INTER_I:
4996                     s->mv_dir = MV_DIR_FORWARD;
4997                     s->mv_type = MV_TYPE_FIELD;
4998                     s->mb_intra= 0;
4999                     for(i=0; i<2; i++){
5000                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5001                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5002                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5003                     }
5004                     motion_x = motion_y = 0;
5005                     break;
5006                 case CANDIDATE_MB_TYPE_INTER4V:
5007                     s->mv_dir = MV_DIR_FORWARD;
5008                     s->mv_type = MV_TYPE_8X8;
5009                     s->mb_intra= 0;
5010                     for(i=0; i<4; i++){
5011                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5012                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5013                     }
5014                     motion_x= motion_y= 0;
5015                     break;
5016                 case CANDIDATE_MB_TYPE_DIRECT:
5017                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5018                     s->mb_intra= 0;
5019                     motion_x=s->b_direct_mv_table[xy][0];
5020                     motion_y=s->b_direct_mv_table[xy][1];
5021 #ifdef CONFIG_RISKY
5022                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5023 #endif
5024                     break;
5025                 case CANDIDATE_MB_TYPE_BIDIR:
5026                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5027                     s->mb_intra= 0;
5028                     motion_x=0;
5029                     motion_y=0;
5030                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5031                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5032                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5033                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5034                     break;
5035                 case CANDIDATE_MB_TYPE_BACKWARD:
5036                     s->mv_dir = MV_DIR_BACKWARD;
5037                     s->mb_intra= 0;
5038                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5039                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5040                     break;
5041                 case CANDIDATE_MB_TYPE_FORWARD:
5042                     s->mv_dir = MV_DIR_FORWARD;
5043                     s->mb_intra= 0;
5044                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5045                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5046 //                    printf(" %d %d ", motion_x, motion_y);
5047                     break;
5048                 case CANDIDATE_MB_TYPE_FORWARD_I:
5049                     s->mv_dir = MV_DIR_FORWARD;
5050                     s->mv_type = MV_TYPE_FIELD;
5051                     s->mb_intra= 0;
5052                     for(i=0; i<2; i++){
5053                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5054                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5055                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5056                     }
5057                     motion_x=motion_y=0;
5058                     break;
5059                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5060                     s->mv_dir = MV_DIR_BACKWARD;
5061                     s->mv_type = MV_TYPE_FIELD;
5062                     s->mb_intra= 0;
5063                     for(i=0; i<2; i++){
5064                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5065                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5066                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5067                     }
5068                     motion_x=motion_y=0;
5069                     break;
5070                 case CANDIDATE_MB_TYPE_BIDIR_I:
5071                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5072                     s->mv_type = MV_TYPE_FIELD;
5073                     s->mb_intra= 0;
5074                     for(dir=0; dir<2; dir++){
5075                         for(i=0; i<2; i++){
5076                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5077                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5078                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5079                         }
5080                     }
5081                     motion_x=motion_y=0;
5082                     break;
5083                 default:
5084                     motion_x=motion_y=0; //gcc warning fix
5085                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5086                 }
5087
5088                 encode_mb(s, motion_x, motion_y);
5089
5090                 // RAL: Update last macrobloc type
5091                 s->last_mv_dir = s->mv_dir;
5092
5093 #ifdef CONFIG_RISKY
5094                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5095                     ff_h263_update_motion_val(s);
5096 #endif
5097
5098                 MPV_decode_mb(s, s->block);
5099             }
5100
5101             /* clean the MV table in IPS frames for direct mode in B frames */
5102             if(s->mb_intra /* && I,P,S_TYPE */){
5103                 s->p_mv_table[xy][0]=0;
5104                 s->p_mv_table[xy][1]=0;
5105             }
5106
5107             if(s->flags&CODEC_FLAG_PSNR){
5108                 int w= 16;
5109                 int h= 16;
5110
5111                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5112                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5113
5114                 s->current_picture_ptr->error[0] += sse(
5115                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5116                     s->dest[0], w, h, s->linesize);
5117                 s->current_picture_ptr->error[1] += sse(
5118                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5119                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5120                 s->current_picture_ptr->error[2] += sse(
5121                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5122                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5123             }
5124             if(s->loop_filter){
5125                 if(s->out_format == FMT_H263)
5126                     ff_h263_loop_filter(s);
5127             }
5128 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5129         }
5130     }
5131
5132 #ifdef CONFIG_RISKY
5133     //not beautifull here but we must write it before flushing so it has to be here
5134     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5135         msmpeg4_encode_ext_header(s);
5136 #endif
5137
5138     write_slice_end(s);
5139
5140     /* Send the last GOB if RTP */
5141     if (s->avctx->rtp_callback) {
5142         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5143         /* Call the RTP callback to send the last GOB */
5144         emms_c();
5145         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, 0);
5146     }
5147
5148     return 0;
5149 }
5150
5151 #define MERGE(field) dst->field += src->field; src->field=0
5152 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5153     MERGE(me.scene_change_score);
5154     MERGE(me.mc_mb_var_sum_temp);
5155     MERGE(me.mb_var_sum_temp);
5156 }
5157
5158 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5159     int i;
5160
5161     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5162     MERGE(dct_count[1]);
5163     MERGE(mv_bits);
5164     MERGE(i_tex_bits);
5165     MERGE(p_tex_bits);
5166     MERGE(i_count);
5167     MERGE(f_count);
5168     MERGE(b_count);
5169     MERGE(skip_count);
5170     MERGE(misc_bits);
5171     MERGE(error_count);
5172     MERGE(padding_bug_score);
5173
5174     if(dst->avctx->noise_reduction){
5175         for(i=0; i<64; i++){
5176             MERGE(dct_error_sum[0][i]);
5177             MERGE(dct_error_sum[1][i]);
5178         }
5179     }
5180
5181     assert(put_bits_count(&src->pb) % 8 ==0);
5182     assert(put_bits_count(&dst->pb) % 8 ==0);
5183     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5184     flush_put_bits(&dst->pb);
5185 }
5186
5187 static void encode_picture(MpegEncContext *s, int picture_number)
5188 {
5189     int i;
5190     int bits;
5191
5192     s->picture_number = picture_number;
5193
5194     /* Reset the average MB variance */
5195     s->me.mb_var_sum_temp    =
5196     s->me.mc_mb_var_sum_temp = 0;
5197
5198 #ifdef CONFIG_RISKY
5199     /* we need to initialize some time vars before we can encode b-frames */
5200     // RAL: Condition added for MPEG1VIDEO
5201     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5202         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5203 #endif
5204
5205     s->me.scene_change_score=0;
5206
5207 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5208
5209     if(s->pict_type==I_TYPE){
5210         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5211         else                        s->no_rounding=0;
5212     }else if(s->pict_type!=B_TYPE){
5213         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5214             s->no_rounding ^= 1;
5215     }
5216
5217     s->mb_intra=0; //for the rate distoration & bit compare functions
5218     for(i=1; i<s->avctx->thread_count; i++){
5219         ff_update_duplicate_context(s->thread_context[i], s);
5220     }
5221
5222     ff_init_me(s);
5223
5224     /* Estimate motion for every MB */
5225     if(s->pict_type != I_TYPE){
5226         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5227             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5228                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5229             }
5230         }
5231
5232         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5233     }else /* if(s->pict_type == I_TYPE) */{
5234         /* I-Frame */
5235         for(i=0; i<s->mb_stride*s->mb_height; i++)
5236             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5237
5238         if(!s->fixed_qscale){
5239             /* finding spatial complexity for I-frame rate control */
5240             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5241         }
5242     }
5243     for(i=1; i<s->avctx->thread_count; i++){
5244         merge_context_after_me(s, s->thread_context[i]);
5245     }
5246     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5247     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5248     emms_c();
5249
5250     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5251         s->pict_type= I_TYPE;
5252         for(i=0; i<s->mb_stride*s->mb_height; i++)
5253             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5254 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5255     }
5256
5257     if(!s->umvplus){
5258         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5259             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5260
5261             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5262                 int a,b;
5263                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5264                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5265                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5266             }
5267
5268             ff_fix_long_p_mvs(s);
5269             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5270             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5271                 int j;
5272                 for(i=0; i<2; i++){
5273                     for(j=0; j<2; j++)
5274                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5275                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5276                 }
5277             }
5278         }
5279
5280         if(s->pict_type==B_TYPE){
5281             int a, b;
5282
5283             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5284             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5285             s->f_code = FFMAX(a, b);
5286
5287             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5288             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5289             s->b_code = FFMAX(a, b);
5290
5291             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5292             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5293             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5294             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5295             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5296                 int dir, j;
5297                 for(dir=0; dir<2; dir++){
5298                     for(i=0; i<2; i++){
5299                         for(j=0; j<2; j++){
5300                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5301                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5302                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5303                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5304                         }
5305                     }
5306                 }
5307             }
5308         }
5309     }
5310
5311     if (!s->fixed_qscale)
5312         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5313
5314     if(s->adaptive_quant){
5315 #ifdef CONFIG_RISKY
5316         switch(s->codec_id){
5317         case CODEC_ID_MPEG4:
5318             ff_clean_mpeg4_qscales(s);
5319             break;
5320         case CODEC_ID_H263:
5321         case CODEC_ID_H263P:
5322         case CODEC_ID_FLV1:
5323             ff_clean_h263_qscales(s);
5324             break;
5325         }
5326 #endif
5327
5328         s->lambda= s->lambda_table[0];
5329         //FIXME broken
5330     }else
5331         s->lambda= s->current_picture.quality;
5332 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5333     update_qscale(s);
5334
5335     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5336         s->qscale= 3; //reduce cliping problems
5337
5338     if (s->out_format == FMT_MJPEG) {
5339         /* for mjpeg, we do include qscale in the matrix */
5340         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5341         for(i=1;i<64;i++){
5342             int j= s->dsp.idct_permutation[i];
5343
5344             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5345         }
5346         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5347                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5348         s->qscale= 8;
5349     }
5350
5351     //FIXME var duplication
5352     s->current_picture_ptr->key_frame=
5353     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5354     s->current_picture_ptr->pict_type=
5355     s->current_picture.pict_type= s->pict_type;
5356
5357     if(s->current_picture.key_frame)
5358         s->picture_in_gop_number=0;
5359
5360     s->last_bits= put_bits_count(&s->pb);
5361     switch(s->out_format) {
5362     case FMT_MJPEG:
5363         mjpeg_picture_header(s);
5364         break;
5365 #ifdef CONFIG_RISKY
5366     case FMT_H261:
5367         ff_h261_encode_picture_header(s, picture_number);
5368         break;
5369     case FMT_H263:
5370         if (s->codec_id == CODEC_ID_WMV2)
5371             ff_wmv2_encode_picture_header(s, picture_number);
5372         else if (s->h263_msmpeg4)
5373             msmpeg4_encode_picture_header(s, picture_number);
5374         else if (s->h263_pred)
5375             mpeg4_encode_picture_header(s, picture_number);
5376         else if (s->codec_id == CODEC_ID_RV10)
5377             rv10_encode_picture_header(s, picture_number);
5378         else if (s->codec_id == CODEC_ID_RV20)
5379             rv20_encode_picture_header(s, picture_number);
5380         else if (s->codec_id == CODEC_ID_FLV1)
5381             ff_flv_encode_picture_header(s, picture_number);
5382         else
5383             h263_encode_picture_header(s, picture_number);
5384         break;
5385 #endif
5386     case FMT_MPEG1:
5387         mpeg1_encode_picture_header(s, picture_number);
5388         break;
5389     case FMT_H264:
5390         break;
5391     default:
5392         assert(0);
5393     }
5394     bits= put_bits_count(&s->pb);
5395     s->header_bits= bits - s->last_bits;
5396
5397     for(i=1; i<s->avctx->thread_count; i++){
5398         update_duplicate_context_after_me(s->thread_context[i], s);
5399     }
5400     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5401     for(i=1; i<s->avctx->thread_count; i++){
5402         merge_context_after_encode(s, s->thread_context[i]);
5403     }
5404     emms_c();
5405 }
5406
5407 #endif //CONFIG_ENCODERS
5408
5409 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5410     const int intra= s->mb_intra;
5411     int i;
5412
5413     s->dct_count[intra]++;
5414
5415     for(i=0; i<64; i++){
5416         int level= block[i];
5417
5418         if(level){
5419             if(level>0){
5420                 s->dct_error_sum[intra][i] += level;
5421                 level -= s->dct_offset[intra][i];
5422                 if(level<0) level=0;
5423             }else{
5424                 s->dct_error_sum[intra][i] -= level;
5425                 level += s->dct_offset[intra][i];
5426                 if(level>0) level=0;
5427             }
5428             block[i]= level;
5429         }
5430     }
5431 }
5432
5433 #ifdef CONFIG_ENCODERS
5434
5435 static int dct_quantize_trellis_c(MpegEncContext *s,
5436                         DCTELEM *block, int n,
5437                         int qscale, int *overflow){
5438     const int *qmat;
5439     const uint8_t *scantable= s->intra_scantable.scantable;
5440     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5441     int max=0;
5442     unsigned int threshold1, threshold2;
5443     int bias=0;
5444     int run_tab[65];
5445     int level_tab[65];
5446     int score_tab[65];
5447     int survivor[65];
5448     int survivor_count;
5449     int last_run=0;
5450     int last_level=0;
5451     int last_score= 0;
5452     int last_i;
5453     int coeff[2][64];
5454     int coeff_count[64];
5455     int qmul, qadd, start_i, last_non_zero, i, dc;
5456     const int esc_length= s->ac_esc_length;
5457     uint8_t * length;
5458     uint8_t * last_length;
5459     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5460
5461     s->dsp.fdct (block);
5462
5463     if(s->dct_error_sum)
5464         s->denoise_dct(s, block);
5465     qmul= qscale*16;
5466     qadd= ((qscale-1)|1)*8;
5467
5468     if (s->mb_intra) {
5469         int q;
5470         if (!s->h263_aic) {
5471             if (n < 4)
5472                 q = s->y_dc_scale;
5473             else
5474                 q = s->c_dc_scale;
5475             q = q << 3;
5476         } else{
5477             /* For AIC we skip quant/dequant of INTRADC */
5478             q = 1 << 3;
5479             qadd=0;
5480         }
5481
5482         /* note: block[0] is assumed to be positive */
5483         block[0] = (block[0] + (q >> 1)) / q;
5484         start_i = 1;
5485         last_non_zero = 0;
5486         qmat = s->q_intra_matrix[qscale];
5487         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5488             bias= 1<<(QMAT_SHIFT-1);
5489         length     = s->intra_ac_vlc_length;
5490         last_length= s->intra_ac_vlc_last_length;
5491     } else {
5492         start_i = 0;
5493         last_non_zero = -1;
5494         qmat = s->q_inter_matrix[qscale];
5495         length     = s->inter_ac_vlc_length;
5496         last_length= s->inter_ac_vlc_last_length;
5497     }
5498     last_i= start_i;
5499
5500     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5501     threshold2= (threshold1<<1);
5502
5503     for(i=63; i>=start_i; i--) {
5504         const int j = scantable[i];
5505         int level = block[j] * qmat[j];
5506
5507         if(((unsigned)(level+threshold1))>threshold2){
5508             last_non_zero = i;
5509             break;
5510         }
5511     }
5512
5513     for(i=start_i; i<=last_non_zero; i++) {
5514         const int j = scantable[i];
5515         int level = block[j] * qmat[j];
5516
5517 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5518 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5519         if(((unsigned)(level+threshold1))>threshold2){
5520             if(level>0){
5521                 level= (bias + level)>>QMAT_SHIFT;
5522                 coeff[0][i]= level;
5523                 coeff[1][i]= level-1;
5524 //                coeff[2][k]= level-2;
5525             }else{
5526                 level= (bias - level)>>QMAT_SHIFT;
5527                 coeff[0][i]= -level;
5528                 coeff[1][i]= -level+1;
5529 //                coeff[2][k]= -level+2;
5530             }
5531             coeff_count[i]= FFMIN(level, 2);
5532             assert(coeff_count[i]);
5533             max |=level;
5534         }else{
5535             coeff[0][i]= (level>>31)|1;
5536             coeff_count[i]= 1;
5537         }
5538     }
5539
5540     *overflow= s->max_qcoeff < max; //overflow might have happend
5541
5542     if(last_non_zero < start_i){
5543         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5544         return last_non_zero;
5545     }
5546
5547     score_tab[start_i]= 0;
5548     survivor[0]= start_i;
5549     survivor_count= 1;
5550
5551     for(i=start_i; i<=last_non_zero; i++){
5552         int level_index, j;
5553         const int dct_coeff= ABS(block[ scantable[i] ]);
5554         const int zero_distoration= dct_coeff*dct_coeff;
5555         int best_score=256*256*256*120;
5556         for(level_index=0; level_index < coeff_count[i]; level_index++){
5557             int distoration;
5558             int level= coeff[level_index][i];
5559             const int alevel= ABS(level);
5560             int unquant_coeff;
5561
5562             assert(level);
5563
5564             if(s->out_format == FMT_H263){
5565                 unquant_coeff= alevel*qmul + qadd;
5566             }else{ //MPEG1
5567                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5568                 if(s->mb_intra){
5569                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5570                         unquant_coeff =   (unquant_coeff - 1) | 1;
5571                 }else{
5572                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5573                         unquant_coeff =   (unquant_coeff - 1) | 1;
5574                 }
5575                 unquant_coeff<<= 3;
5576             }
5577
5578             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5579             level+=64;
5580             if((level&(~127)) == 0){
5581                 for(j=survivor_count-1; j>=0; j--){
5582                     int run= i - survivor[j];
5583                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5584                     score += score_tab[i-run];
5585
5586                     if(score < best_score){
5587                         best_score= score;
5588                         run_tab[i+1]= run;
5589                         level_tab[i+1]= level-64;
5590                     }
5591                 }
5592
5593                 if(s->out_format == FMT_H263){
5594                     for(j=survivor_count-1; j>=0; j--){
5595                         int run= i - survivor[j];
5596                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5597                         score += score_tab[i-run];
5598                         if(score < last_score){
5599                             last_score= score;
5600                             last_run= run;
5601                             last_level= level-64;
5602                             last_i= i+1;
5603                         }
5604                     }
5605                 }
5606             }else{
5607                 distoration += esc_length*lambda;
5608                 for(j=survivor_count-1; j>=0; j--){
5609                     int run= i - survivor[j];
5610                     int score= distoration + score_tab[i-run];
5611
5612                     if(score < best_score){
5613                         best_score= score;
5614                         run_tab[i+1]= run;
5615                         level_tab[i+1]= level-64;
5616                     }
5617                 }
5618
5619                 if(s->out_format == FMT_H263){
5620                   for(j=survivor_count-1; j>=0; j--){
5621                         int run= i - survivor[j];
5622                         int score= distoration + score_tab[i-run];
5623                         if(score < last_score){
5624                             last_score= score;
5625                             last_run= run;
5626                             last_level= level-64;
5627                             last_i= i+1;
5628                         }
5629                     }
5630                 }
5631             }
5632         }
5633
5634         score_tab[i+1]= best_score;
5635
5636         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5637         if(last_non_zero <= 27){
5638             for(; survivor_count; survivor_count--){
5639                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5640                     break;
5641             }
5642         }else{
5643             for(; survivor_count; survivor_count--){
5644                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5645                     break;
5646             }
5647         }
5648
5649         survivor[ survivor_count++ ]= i+1;
5650     }
5651
5652     if(s->out_format != FMT_H263){
5653         last_score= 256*256*256*120;
5654         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5655             int score= score_tab[i];
5656             if(i) score += lambda*2; //FIXME exacter?
5657
5658             if(score < last_score){
5659                 last_score= score;
5660                 last_i= i;
5661                 last_level= level_tab[i];
5662                 last_run= run_tab[i];
5663             }
5664         }
5665     }
5666
5667     s->coded_score[n] = last_score;
5668
5669     dc= ABS(block[0]);
5670     last_non_zero= last_i - 1;
5671     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5672
5673     if(last_non_zero < start_i)
5674         return last_non_zero;
5675
5676     if(last_non_zero == 0 && start_i == 0){
5677         int best_level= 0;
5678         int best_score= dc * dc;
5679
5680         for(i=0; i<coeff_count[0]; i++){
5681             int level= coeff[i][0];
5682             int alevel= ABS(level);
5683             int unquant_coeff, score, distortion;
5684
5685             if(s->out_format == FMT_H263){
5686                     unquant_coeff= (alevel*qmul + qadd)>>3;
5687             }else{ //MPEG1
5688                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5689                     unquant_coeff =   (unquant_coeff - 1) | 1;
5690             }
5691             unquant_coeff = (unquant_coeff + 4) >> 3;
5692             unquant_coeff<<= 3 + 3;
5693
5694             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5695             level+=64;
5696             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5697             else                    score= distortion + esc_length*lambda;
5698
5699             if(score < best_score){
5700                 best_score= score;
5701                 best_level= level - 64;
5702             }
5703         }
5704         block[0]= best_level;
5705         s->coded_score[n] = best_score - dc*dc;
5706         if(best_level == 0) return -1;
5707         else                return last_non_zero;
5708     }
5709
5710     i= last_i;
5711     assert(last_level);
5712
5713     block[ perm_scantable[last_non_zero] ]= last_level;
5714     i -= last_run + 1;
5715
5716     for(; i>start_i; i -= run_tab[i] + 1){
5717         block[ perm_scantable[i-1] ]= level_tab[i];
5718     }
5719
5720     return last_non_zero;
5721 }
5722
5723 //#define REFINE_STATS 1
5724 static int16_t basis[64][64];
5725
5726 static void build_basis(uint8_t *perm){
5727     int i, j, x, y;
5728     emms_c();
5729     for(i=0; i<8; i++){
5730         for(j=0; j<8; j++){
5731             for(y=0; y<8; y++){
5732                 for(x=0; x<8; x++){
5733                     double s= 0.25*(1<<BASIS_SHIFT);
5734                     int index= 8*i + j;
5735                     int perm_index= perm[index];
5736                     if(i==0) s*= sqrt(0.5);
5737                     if(j==0) s*= sqrt(0.5);
5738                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5739                 }
5740             }
5741         }
5742     }
5743 }
5744
5745 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5746                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5747                         int n, int qscale){
5748     int16_t rem[64];
5749     DCTELEM d1[64] __align16;
5750     const int *qmat;
5751     const uint8_t *scantable= s->intra_scantable.scantable;
5752     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5753 //    unsigned int threshold1, threshold2;
5754 //    int bias=0;
5755     int run_tab[65];
5756     int prev_run=0;
5757     int prev_level=0;
5758     int qmul, qadd, start_i, last_non_zero, i, dc;
5759     uint8_t * length;
5760     uint8_t * last_length;
5761     int lambda;
5762     int rle_index, run, q, sum;
5763 #ifdef REFINE_STATS
5764 static int count=0;
5765 static int after_last=0;
5766 static int to_zero=0;
5767 static int from_zero=0;
5768 static int raise=0;
5769 static int lower=0;
5770 static int messed_sign=0;
5771 #endif
5772
5773     if(basis[0][0] == 0)
5774         build_basis(s->dsp.idct_permutation);
5775
5776     qmul= qscale*2;
5777     qadd= (qscale-1)|1;
5778     if (s->mb_intra) {
5779         if (!s->h263_aic) {
5780             if (n < 4)
5781                 q = s->y_dc_scale;
5782             else
5783                 q = s->c_dc_scale;
5784         } else{
5785             /* For AIC we skip quant/dequant of INTRADC */
5786             q = 1;
5787             qadd=0;
5788         }
5789         q <<= RECON_SHIFT-3;
5790         /* note: block[0] is assumed to be positive */
5791         dc= block[0]*q;
5792 //        block[0] = (block[0] + (q >> 1)) / q;
5793         start_i = 1;
5794         qmat = s->q_intra_matrix[qscale];
5795 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5796 //            bias= 1<<(QMAT_SHIFT-1);
5797         length     = s->intra_ac_vlc_length;
5798         last_length= s->intra_ac_vlc_last_length;
5799     } else {
5800         dc= 0;
5801         start_i = 0;
5802         qmat = s->q_inter_matrix[qscale];
5803         length     = s->inter_ac_vlc_length;
5804         last_length= s->inter_ac_vlc_last_length;
5805     }
5806     last_non_zero = s->block_last_index[n];
5807
5808 #ifdef REFINE_STATS
5809 {START_TIMER
5810 #endif
5811     dc += (1<<(RECON_SHIFT-1));
5812     for(i=0; i<64; i++){
5813         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5814     }
5815 #ifdef REFINE_STATS
5816 STOP_TIMER("memset rem[]")}
5817 #endif
5818     sum=0;
5819     for(i=0; i<64; i++){
5820         int one= 36;
5821         int qns=4;
5822         int w;
5823
5824         w= ABS(weight[i]) + qns*one;
5825         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5826
5827         weight[i] = w;
5828 //        w=weight[i] = (63*qns + (w/2)) / w;
5829
5830         assert(w>0);
5831         assert(w<(1<<6));
5832         sum += w*w;
5833     }
5834     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5835 #ifdef REFINE_STATS
5836 {START_TIMER
5837 #endif
5838     run=0;
5839     rle_index=0;
5840     for(i=start_i; i<=last_non_zero; i++){
5841         int j= perm_scantable[i];
5842         const int level= block[j];
5843         int coeff;
5844
5845         if(level){
5846             if(level<0) coeff= qmul*level - qadd;
5847             else        coeff= qmul*level + qadd;
5848             run_tab[rle_index++]=run;
5849             run=0;
5850
5851             s->dsp.add_8x8basis(rem, basis[j], coeff);
5852         }else{
5853             run++;
5854         }
5855     }
5856 #ifdef REFINE_STATS
5857 if(last_non_zero>0){
5858 STOP_TIMER("init rem[]")
5859 }
5860 }
5861
5862 {START_TIMER
5863 #endif
5864     for(;;){
5865         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5866         int best_coeff=0;
5867         int best_change=0;
5868         int run2, best_unquant_change=0, analyze_gradient;
5869 #ifdef REFINE_STATS
5870 {START_TIMER
5871 #endif
5872         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5873
5874         if(analyze_gradient){
5875 #ifdef REFINE_STATS
5876 {START_TIMER
5877 #endif
5878             for(i=0; i<64; i++){
5879                 int w= weight[i];
5880
5881                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5882             }
5883 #ifdef REFINE_STATS
5884 STOP_TIMER("rem*w*w")}
5885 {START_TIMER
5886 #endif
5887             s->dsp.fdct(d1);
5888 #ifdef REFINE_STATS
5889 STOP_TIMER("dct")}
5890 #endif
5891         }
5892
5893         if(start_i){
5894             const int level= block[0];
5895             int change, old_coeff;
5896
5897             assert(s->mb_intra);
5898
5899             old_coeff= q*level;
5900
5901             for(change=-1; change<=1; change+=2){
5902                 int new_level= level + change;
5903                 int score, new_coeff;
5904
5905                 new_coeff= q*new_level;
5906                 if(new_coeff >= 2048 || new_coeff < 0)
5907                     continue;
5908
5909                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5910                 if(score<best_score){
5911                     best_score= score;
5912                     best_coeff= 0;
5913                     best_change= change;
5914                     best_unquant_change= new_coeff - old_coeff;
5915                 }
5916             }
5917         }
5918
5919         run=0;
5920         rle_index=0;
5921         run2= run_tab[rle_index++];
5922         prev_level=0;
5923         prev_run=0;
5924
5925         for(i=start_i; i<64; i++){
5926             int j= perm_scantable[i];
5927             const int level= block[j];
5928             int change, old_coeff;
5929
5930             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5931                 break;
5932
5933             if(level){
5934                 if(level<0) old_coeff= qmul*level - qadd;
5935                 else        old_coeff= qmul*level + qadd;
5936                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5937             }else{
5938                 old_coeff=0;
5939                 run2--;
5940                 assert(run2>=0 || i >= last_non_zero );
5941             }
5942
5943             for(change=-1; change<=1; change+=2){
5944                 int new_level= level + change;
5945                 int score, new_coeff, unquant_change;
5946
5947                 score=0;
5948                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5949                    continue;
5950
5951                 if(new_level){
5952                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5953                     else            new_coeff= qmul*new_level + qadd;
5954                     if(new_coeff >= 2048 || new_coeff <= -2048)
5955                         continue;
5956                     //FIXME check for overflow
5957
5958                     if(level){
5959                         if(level < 63 && level > -63){
5960                             if(i < last_non_zero)
5961                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5962                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5963                             else
5964                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5965                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5966                         }
5967                     }else{
5968                         assert(ABS(new_level)==1);
5969
5970                         if(analyze_gradient){
5971                             int g= d1[ scantable[i] ];
5972                             if(g && (g^new_level) >= 0)
5973                                 continue;
5974                         }
5975
5976                         if(i < last_non_zero){
5977                             int next_i= i + run2 + 1;
5978                             int next_level= block[ perm_scantable[next_i] ] + 64;
5979
5980                             if(next_level&(~127))
5981                                 next_level= 0;
5982
5983                             if(next_i < last_non_zero)
5984                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
5985                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
5986                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5987                             else
5988                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
5989                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5990                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5991                         }else{
5992                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
5993                             if(prev_level){
5994                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5995                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5996                             }
5997                         }
5998                     }
5999                 }else{
6000                     new_coeff=0;
6001                     assert(ABS(level)==1);
6002
6003                     if(i < last_non_zero){
6004                         int next_i= i + run2 + 1;
6005                         int next_level= block[ perm_scantable[next_i] ] + 64;
6006
6007                         if(next_level&(~127))
6008                             next_level= 0;
6009
6010                         if(next_i < last_non_zero)
6011                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6012                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6013                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6014                         else
6015                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6016                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6017                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6018                     }else{
6019                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6020                         if(prev_level){
6021                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6022                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6023                         }
6024                     }
6025                 }
6026
6027                 score *= lambda;
6028
6029                 unquant_change= new_coeff - old_coeff;
6030                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6031
6032                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6033                 if(score<best_score){
6034                     best_score= score;
6035                     best_coeff= i;
6036                     best_change= change;
6037                     best_unquant_change= unquant_change;
6038                 }
6039             }
6040             if(level){
6041                 prev_level= level + 64;
6042                 if(prev_level&(~127))
6043                     prev_level= 0;
6044                 prev_run= run;
6045                 run=0;
6046             }else{
6047                 run++;
6048             }
6049         }
6050 #ifdef REFINE_STATS
6051 STOP_TIMER("iterative step")}
6052 #endif
6053
6054         if(best_change){
6055             int j= perm_scantable[ best_coeff ];
6056
6057             block[j] += best_change;
6058
6059             if(best_coeff > last_non_zero){
6060                 last_non_zero= best_coeff;
6061                 assert(block[j]);
6062 #ifdef REFINE_STATS
6063 after_last++;
6064 #endif
6065             }else{
6066 #ifdef REFINE_STATS
6067 if(block[j]){
6068     if(block[j] - best_change){
6069         if(ABS(block[j]) > ABS(block[j] - best_change)){
6070             raise++;
6071         }else{
6072             lower++;
6073         }
6074     }else{
6075         from_zero++;
6076     }
6077 }else{
6078     to_zero++;
6079 }
6080 #endif
6081                 for(; last_non_zero>=start_i; last_non_zero--){
6082                     if(block[perm_scantable[last_non_zero]])
6083                         break;
6084                 }
6085             }
6086 #ifdef REFINE_STATS
6087 count++;
6088 if(256*256*256*64 % count == 0){
6089     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6090 }
6091 #endif
6092             run=0;
6093             rle_index=0;
6094             for(i=start_i; i<=last_non_zero; i++){
6095                 int j= perm_scantable[i];
6096                 const int level= block[j];
6097
6098                  if(level){
6099                      run_tab[rle_index++]=run;
6100                      run=0;
6101                  }else{
6102                      run++;
6103                  }
6104             }
6105
6106             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6107         }else{
6108             break;
6109         }
6110     }
6111 #ifdef REFINE_STATS
6112 if(last_non_zero>0){
6113 STOP_TIMER("iterative search")
6114 }
6115 }
6116 #endif
6117
6118     return last_non_zero;
6119 }
6120
6121 static int dct_quantize_c(MpegEncContext *s,
6122                         DCTELEM *block, int n,
6123                         int qscale, int *overflow)
6124 {
6125     int i, j, level, last_non_zero, q, start_i;
6126     const int *qmat;
6127     const uint8_t *scantable= s->intra_scantable.scantable;
6128     int bias;
6129     int max=0;
6130     unsigned int threshold1, threshold2;
6131
6132     s->dsp.fdct (block);
6133
6134     if(s->dct_error_sum)
6135         s->denoise_dct(s, block);
6136
6137     if (s->mb_intra) {
6138         if (!s->h263_aic) {
6139             if (n < 4)
6140                 q = s->y_dc_scale;
6141             else
6142                 q = s->c_dc_scale;
6143             q = q << 3;
6144         } else
6145             /* For AIC we skip quant/dequant of INTRADC */
6146             q = 1 << 3;
6147
6148         /* note: block[0] is assumed to be positive */
6149         block[0] = (block[0] + (q >> 1)) / q;
6150         start_i = 1;
6151         last_non_zero = 0;
6152         qmat = s->q_intra_matrix[qscale];
6153         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6154     } else {
6155         start_i = 0;
6156         last_non_zero = -1;
6157         qmat = s->q_inter_matrix[qscale];
6158         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6159     }
6160     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6161     threshold2= (threshold1<<1);
6162     for(i=63;i>=start_i;i--) {
6163         j = scantable[i];
6164         level = block[j] * qmat[j];
6165
6166         if(((unsigned)(level+threshold1))>threshold2){
6167             last_non_zero = i;
6168             break;
6169         }else{
6170             block[j]=0;
6171         }
6172     }
6173     for(i=start_i; i<=last_non_zero; i++) {
6174         j = scantable[i];
6175         level = block[j] * qmat[j];
6176
6177 //        if(   bias+level >= (1<<QMAT_SHIFT)
6178 //           || bias-level >= (1<<QMAT_SHIFT)){
6179         if(((unsigned)(level+threshold1))>threshold2){
6180             if(level>0){
6181                 level= (bias + level)>>QMAT_SHIFT;
6182                 block[j]= level;
6183             }else{
6184                 level= (bias - level)>>QMAT_SHIFT;
6185                 block[j]= -level;
6186             }
6187             max |=level;
6188         }else{
6189             block[j]=0;
6190         }
6191     }
6192     *overflow= s->max_qcoeff < max; //overflow might have happend
6193
6194     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6195     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6196         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6197
6198     return last_non_zero;
6199 }
6200
6201 #endif //CONFIG_ENCODERS
6202
6203 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6204                                    DCTELEM *block, int n, int qscale)
6205 {
6206     int i, level, nCoeffs;
6207     const uint16_t *quant_matrix;
6208
6209     nCoeffs= s->block_last_index[n];
6210
6211     if (n < 4)
6212         block[0] = block[0] * s->y_dc_scale;
6213     else
6214         block[0] = block[0] * s->c_dc_scale;
6215     /* XXX: only mpeg1 */
6216     quant_matrix = s->intra_matrix;
6217     for(i=1;i<=nCoeffs;i++) {
6218         int j= s->intra_scantable.permutated[i];
6219         level = block[j];
6220         if (level) {
6221             if (level < 0) {
6222                 level = -level;
6223                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6224                 level = (level - 1) | 1;
6225                 level = -level;
6226             } else {
6227                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6228                 level = (level - 1) | 1;
6229             }
6230             block[j] = level;
6231         }
6232     }
6233 }
6234
6235 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6236                                    DCTELEM *block, int n, int qscale)
6237 {
6238     int i, level, nCoeffs;
6239     const uint16_t *quant_matrix;
6240
6241     nCoeffs= s->block_last_index[n];
6242
6243     quant_matrix = s->inter_matrix;
6244     for(i=0; i<=nCoeffs; i++) {
6245         int j= s->intra_scantable.permutated[i];
6246         level = block[j];
6247         if (level) {
6248             if (level < 0) {
6249                 level = -level;
6250                 level = (((level << 1) + 1) * qscale *
6251                          ((int) (quant_matrix[j]))) >> 4;
6252                 level = (level - 1) | 1;
6253                 level = -level;
6254             } else {
6255                 level = (((level << 1) + 1) * qscale *
6256                          ((int) (quant_matrix[j]))) >> 4;
6257                 level = (level - 1) | 1;
6258             }
6259             block[j] = level;
6260         }
6261     }
6262 }
6263
6264 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6265                                    DCTELEM *block, int n, int qscale)
6266 {
6267     int i, level, nCoeffs;
6268     const uint16_t *quant_matrix;
6269
6270     if(s->alternate_scan) nCoeffs= 63;
6271     else nCoeffs= s->block_last_index[n];
6272
6273     if (n < 4)
6274         block[0] = block[0] * s->y_dc_scale;
6275     else
6276         block[0] = block[0] * s->c_dc_scale;
6277     quant_matrix = s->intra_matrix;
6278     for(i=1;i<=nCoeffs;i++) {
6279         int j= s->intra_scantable.permutated[i];
6280         level = block[j];
6281         if (level) {
6282             if (level < 0) {
6283                 level = -level;
6284                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6285                 level = -level;
6286             } else {
6287                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6288             }
6289             block[j] = level;
6290         }
6291     }
6292 }
6293
6294 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6295                                    DCTELEM *block, int n, int qscale)
6296 {
6297     int i, level, nCoeffs;
6298     const uint16_t *quant_matrix;
6299     int sum=-1;
6300
6301     if(s->alternate_scan) nCoeffs= 63;
6302     else nCoeffs= s->block_last_index[n];
6303
6304     quant_matrix = s->inter_matrix;
6305     for(i=0; i<=nCoeffs; i++) {
6306         int j= s->intra_scantable.permutated[i];
6307         level = block[j];
6308         if (level) {
6309             if (level < 0) {
6310                 level = -level;
6311                 level = (((level << 1) + 1) * qscale *
6312                          ((int) (quant_matrix[j]))) >> 4;
6313                 level = -level;
6314             } else {
6315                 level = (((level << 1) + 1) * qscale *
6316                          ((int) (quant_matrix[j]))) >> 4;
6317             }
6318             block[j] = level;
6319             sum+=level;
6320         }
6321     }
6322     block[63]^=sum&1;
6323 }
6324
6325 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6326                                   DCTELEM *block, int n, int qscale)
6327 {
6328     int i, level, qmul, qadd;
6329     int nCoeffs;
6330
6331     assert(s->block_last_index[n]>=0);
6332
6333     qmul = qscale << 1;
6334
6335     if (!s->h263_aic) {
6336         if (n < 4)
6337             block[0] = block[0] * s->y_dc_scale;
6338         else
6339             block[0] = block[0] * s->c_dc_scale;
6340         qadd = (qscale - 1) | 1;
6341     }else{
6342         qadd = 0;
6343     }
6344     if(s->ac_pred)
6345         nCoeffs=63;
6346     else
6347         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6348
6349     for(i=1; i<=nCoeffs; i++) {
6350         level = block[i];
6351         if (level) {
6352             if (level < 0) {
6353                 level = level * qmul - qadd;
6354             } else {
6355                 level = level * qmul + qadd;
6356             }
6357             block[i] = level;
6358         }
6359     }
6360 }
6361
6362 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6363                                   DCTELEM *block, int n, int qscale)
6364 {
6365     int i, level, qmul, qadd;
6366     int nCoeffs;
6367
6368     assert(s->block_last_index[n]>=0);
6369
6370     qadd = (qscale - 1) | 1;
6371     qmul = qscale << 1;
6372
6373     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6374
6375     for(i=0; i<=nCoeffs; i++) {
6376         level = block[i];
6377         if (level) {
6378             if (level < 0) {
6379                 level = level * qmul - qadd;
6380             } else {
6381                 level = level * qmul + qadd;
6382             }
6383             block[i] = level;
6384         }
6385     }
6386 }
6387
6388 static const AVOption mpeg4_options[] =
6389 {
6390     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
6391     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
6392                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
6393                        bit_rate_tolerance, 4, 240000000, 8000),
6394     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
6395     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
6396     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
6397                           rc_eq, "tex^qComp,option1,options2", 0),
6398     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
6399                        rc_min_rate, 4, 24000000, 0),
6400     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
6401                        rc_max_rate, 4, 24000000, 0),
6402     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
6403                           rc_buffer_aggressivity, 4, 24000000, 0),
6404     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
6405                           rc_initial_cplx, 0., 9999999., 0),
6406     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
6407                           i_quant_factor, 0., 0., 0),
6408     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
6409                           i_quant_factor, -999999., 999999., 0),
6410     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
6411                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
6412     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
6413                           lumi_masking, 0., 999999., 0),
6414     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
6415                           temporal_cplx_masking, 0., 999999., 0),
6416     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
6417                           spatial_cplx_masking, 0., 999999., 0),
6418     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
6419                           p_masking, 0., 999999., 0),
6420     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
6421                           dark_masking, 0., 999999., 0),
6422     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
6423                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
6424
6425     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
6426                        mb_qmin, 0, 8, 0),
6427     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
6428                        mb_qmin, 0, 8, 0),
6429
6430     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
6431                        me_cmp, 0, 24000000, 0),
6432     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
6433                        me_sub_cmp, 0, 24000000, 0),
6434
6435
6436     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
6437                        dia_size, 0, 24000000, 0),
6438     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
6439                        last_predictor_count, 0, 24000000, 0),
6440
6441     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
6442                        pre_me, 0, 24000000, 0),
6443     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
6444                        me_pre_cmp, 0, 24000000, 0),
6445
6446     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
6447                        me_range, 0, 24000000, 0),
6448     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
6449                        pre_dia_size, 0, 24000000, 0),
6450     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
6451                        me_subpel_quality, 0, 24000000, 0),
6452     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
6453                        me_range, 0, 24000000, 0),
6454     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
6455                         flags, CODEC_FLAG_PSNR, 0),
6456     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
6457                               rc_override),
6458     AVOPTION_SUB(avoptions_common),
6459     AVOPTION_END()
6460 };
6461
6462 #ifdef CONFIG_ENCODERS
6463 #ifdef CONFIG_RISKY
6464 AVCodec h263_encoder = {
6465     "h263",
6466     CODEC_TYPE_VIDEO,
6467     CODEC_ID_H263,
6468     sizeof(MpegEncContext),
6469     MPV_encode_init,
6470     MPV_encode_picture,
6471     MPV_encode_end,
6472 };
6473
6474 AVCodec h263p_encoder = {
6475     "h263p",
6476     CODEC_TYPE_VIDEO,
6477     CODEC_ID_H263P,
6478     sizeof(MpegEncContext),
6479     MPV_encode_init,
6480     MPV_encode_picture,
6481     MPV_encode_end,
6482 };
6483
6484 AVCodec flv_encoder = {
6485     "flv",
6486     CODEC_TYPE_VIDEO,
6487     CODEC_ID_FLV1,
6488     sizeof(MpegEncContext),
6489     MPV_encode_init,
6490     MPV_encode_picture,
6491     MPV_encode_end,
6492 };
6493
6494 AVCodec rv10_encoder = {
6495     "rv10",
6496     CODEC_TYPE_VIDEO,
6497     CODEC_ID_RV10,
6498     sizeof(MpegEncContext),
6499     MPV_encode_init,
6500     MPV_encode_picture,
6501     MPV_encode_end,
6502 };
6503
6504 AVCodec rv20_encoder = {
6505     "rv20",
6506     CODEC_TYPE_VIDEO,
6507     CODEC_ID_RV20,
6508     sizeof(MpegEncContext),
6509     MPV_encode_init,
6510     MPV_encode_picture,
6511     MPV_encode_end,
6512 };
6513
6514 AVCodec mpeg4_encoder = {
6515     "mpeg4",
6516     CODEC_TYPE_VIDEO,
6517     CODEC_ID_MPEG4,
6518     sizeof(MpegEncContext),
6519     MPV_encode_init,
6520     MPV_encode_picture,
6521     MPV_encode_end,
6522     .options = mpeg4_options,
6523     .capabilities= CODEC_CAP_DELAY,
6524 };
6525
6526 AVCodec msmpeg4v1_encoder = {
6527     "msmpeg4v1",
6528     CODEC_TYPE_VIDEO,
6529     CODEC_ID_MSMPEG4V1,
6530     sizeof(MpegEncContext),
6531     MPV_encode_init,
6532     MPV_encode_picture,
6533     MPV_encode_end,
6534     .options = mpeg4_options,
6535 };
6536
6537 AVCodec msmpeg4v2_encoder = {
6538     "msmpeg4v2",
6539     CODEC_TYPE_VIDEO,
6540     CODEC_ID_MSMPEG4V2,
6541     sizeof(MpegEncContext),
6542     MPV_encode_init,
6543     MPV_encode_picture,
6544     MPV_encode_end,
6545     .options = mpeg4_options,
6546 };
6547
6548 AVCodec msmpeg4v3_encoder = {
6549     "msmpeg4",
6550     CODEC_TYPE_VIDEO,
6551     CODEC_ID_MSMPEG4V3,
6552     sizeof(MpegEncContext),
6553     MPV_encode_init,
6554     MPV_encode_picture,
6555     MPV_encode_end,
6556     .options = mpeg4_options,
6557 };
6558
6559 AVCodec wmv1_encoder = {
6560     "wmv1",
6561     CODEC_TYPE_VIDEO,
6562     CODEC_ID_WMV1,
6563     sizeof(MpegEncContext),
6564     MPV_encode_init,
6565     MPV_encode_picture,
6566     MPV_encode_end,
6567     .options = mpeg4_options,
6568 };
6569
6570 #endif
6571
6572 AVCodec mjpeg_encoder = {
6573     "mjpeg",
6574     CODEC_TYPE_VIDEO,
6575     CODEC_ID_MJPEG,
6576     sizeof(MpegEncContext),
6577     MPV_encode_init,
6578     MPV_encode_picture,
6579     MPV_encode_end,
6580 };
6581
6582 #endif //CONFIG_ENCODERS