git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  21  */
  22
  23 /**
  24  * @file mpegvideo.c
  25  * The simplest mpeg encoder (well, it was the simplest!).
  26  */
  27
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31 #include "faandct.h"
  32 #include <limits.h>
  33
  34 #ifdef USE_FASTMEMCPY
  35 #include "fastmemcpy.h"
  36 #endif
  37
  38 //#undef NDEBUG
  39 //#include <assert.h>
  40
  41 #ifdef CONFIG_ENCODERS
  42 static void encode_picture(MpegEncContext *s, int picture_number);
  43 #endif //CONFIG_ENCODERS
  44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h261_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h261_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  61 #ifdef CONFIG_ENCODERS
  62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  65 static int sse_mb(MpegEncContext *s);
  66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  67 #endif //CONFIG_ENCODERS
  68
  69 #ifdef HAVE_XVMC
  70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  71 extern void XVMC_field_end(MpegEncContext *s);
  72 extern void XVMC_decode_mb(MpegEncContext *s);
  73 #endif
  74
  75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  76
  77
  78 /* enable all paranoid tests for rounding, overflows, etc... */
  79 //#define PARANOID
  80
  81 //#define DEBUG
  82
  83
  84 /* for jpeg fast DCT */
  85 #define CONST_BITS 14
  86
  87 static const uint16_t aanscales[64] = {
  88     /* precomputed values scaled up by 14 bits */
  89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  97 };
  98
  99 static const uint8_t h263_chroma_roundtab[16] = {
 100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 102 };
 103
 104 static const uint8_t ff_default_chroma_qscale_table[32]={
 105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 107 };
 108
 109 #ifdef CONFIG_ENCODERS
 110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 111 static uint8_t default_fcode_tab[MAX_MV*2+1];
 112
 113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
 114
 115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
 116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
 117 {
 118     int qscale;
 119
 120     for(qscale=qmin; qscale<=qmax; qscale++){
 121         int i;
 122         if (dsp->fdct == ff_jpeg_fdct_islow
 123 #ifdef FAAN_POSTSCALE
 124             || dsp->fdct == ff_faandct
 125 #endif
 126             ) {
 127             for(i=0;i<64;i++) {
 128                 const int j= dsp->idct_permutation[i];
 129                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 130                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 131                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 132                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 133
 134                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 135                                 (qscale * quant_matrix[j]));
 136             }
 137         } else if (dsp->fdct == fdct_ifast
 138 #ifndef FAAN_POSTSCALE
 139                    || dsp->fdct == ff_faandct
 140 #endif
 141                    ) {
 142             for(i=0;i<64;i++) {
 143                 const int j= dsp->idct_permutation[i];
 144                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 145                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 146                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 147                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 148
 149                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 150                                 (aanscales[i] * qscale * quant_matrix[j]));
 151             }
 152         } else {
 153             for(i=0;i<64;i++) {
 154                 const int j= dsp->idct_permutation[i];
 155                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 156                    So 16           <= qscale * quant_matrix[i]             <= 7905
 157                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 158                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 159                 */
 160                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 161 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 162                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 163
 164                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 165                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 166             }
 167         }
 168     }
 169 }
 170
 171 static inline void update_qscale(MpegEncContext *s){
 172     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
 173     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 174
 175     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 176 }
 177 #endif //CONFIG_ENCODERS
 178
 179 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 180     int i;
 181     int end;
 182
 183     st->scantable= src_scantable;
 184
 185     for(i=0; i<64; i++){
 186         int j;
 187         j = src_scantable[i];
 188         st->permutated[i] = permutation[j];
 189 #ifdef ARCH_POWERPC
 190         st->inverse[j] = i;
 191 #endif
 192     }
 193
 194     end=-1;
 195     for(i=0; i<64; i++){
 196         int j;
 197         j = st->permutated[i];
 198         if(j>end) end=j;
 199         st->raster_end[i]= end;
 200     }
 201 }
 202
 203 #ifdef CONFIG_ENCODERS
 204 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 205     int i;
 206
 207     if(matrix){
 208         put_bits(pb, 1, 1);
 209         for(i=0;i<64;i++) {
 210             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 211         }
 212     }else
 213         put_bits(pb, 1, 0);
 214 }
 215 #endif //CONFIG_ENCODERS
 216
 217 /* init common dct for both encoder and decoder */
 218 int DCT_common_init(MpegEncContext *s)
 219 {
 220     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 221     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 222     s->dct_unquantize_h261_intra = dct_unquantize_h261_intra_c;
 223     s->dct_unquantize_h261_inter = dct_unquantize_h261_inter_c;
 224     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 225     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 226     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 227     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 228
 229 #ifdef CONFIG_ENCODERS
 230     s->dct_quantize= dct_quantize_c;
 231     s->denoise_dct= denoise_dct_c;
 232 #endif
 233
 234 #ifdef HAVE_MMX
 235     MPV_common_init_mmx(s);
 236 #endif
 237 #ifdef ARCH_ALPHA
 238     MPV_common_init_axp(s);
 239 #endif
 240 #ifdef HAVE_MLIB
 241     MPV_common_init_mlib(s);
 242 #endif
 243 #ifdef HAVE_MMI
 244     MPV_common_init_mmi(s);
 245 #endif
 246 #ifdef ARCH_ARMV4L
 247     MPV_common_init_armv4l(s);
 248 #endif
 249 #ifdef ARCH_POWERPC
 250     MPV_common_init_ppc(s);
 251 #endif
 252
 253 #ifdef CONFIG_ENCODERS
 254     s->fast_dct_quantize= s->dct_quantize;
 255
 256     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 257         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 258     }
 259
 260 #endif //CONFIG_ENCODERS
 261
 262     /* load & permutate scantables
 263        note: only wmv uses differnt ones
 264     */
 265     if(s->alternate_scan){
 266         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 267         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 268     }else{
 269         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 270         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 271     }
 272     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 273     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 274
 275     return 0;
 276 }
 277
 278 static void copy_picture(Picture *dst, Picture *src){
 279     *dst = *src;
 280     dst->type= FF_BUFFER_TYPE_COPY;
 281 }
 282
 283 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
 284     int i;
 285
 286     dst->pict_type              = src->pict_type;
 287     dst->quality                = src->quality;
 288     dst->coded_picture_number   = src->coded_picture_number;
 289     dst->display_picture_number = src->display_picture_number;
 290 //    dst->reference              = src->reference;
 291     dst->pts                    = src->pts;
 292     dst->interlaced_frame       = src->interlaced_frame;
 293     dst->top_field_first        = src->top_field_first;
 294
 295     if(s->avctx->me_threshold){
 296         if(!src->motion_val[0])
 297             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 298         if(!src->mb_type)
 299             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 300         if(!src->ref_index[0])
 301             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 302         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
 303             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
 304             src->motion_subsample_log2, dst->motion_subsample_log2);
 305
 306         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 307
 308         for(i=0; i<2; i++){
 309             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
 310             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
 311
 312             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
 313                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
 314             }
 315             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
 316                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
 317             }
 318         }
 319     }
 320 }
 321
 322 /**
 323  * allocates a Picture
 324  * The pixels are allocated/set by calling get_buffer() if shared=0
 325  */
 326 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 327     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 328     const int mb_array_size= s->mb_stride*s->mb_height;
 329     const int b8_array_size= s->b8_stride*s->mb_height*2;
 330     const int b4_array_size= s->b4_stride*s->mb_height*4;
 331     int i;
 332
 333     if(shared){
 334         assert(pic->data[0]);
 335         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 336         pic->type= FF_BUFFER_TYPE_SHARED;
 337     }else{
 338         int r;
 339
 340         assert(!pic->data[0]);
 341
 342         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 343
 344         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 345             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 346             return -1;
 347         }
 348
 349         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 350             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 351             return -1;
 352         }
 353
 354         if(pic->linesize[1] != pic->linesize[2]){
 355             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
 356             return -1;
 357         }
 358
 359         s->linesize  = pic->linesize[0];
 360         s->uvlinesize= pic->linesize[1];
 361     }
 362
 363     if(pic->qscale_table==NULL){
 364         if (s->encoding) {
 365             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 366             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 367             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 368         }
 369
 370         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 371         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 372         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 373         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 374         if(s->out_format == FMT_H264){
 375             for(i=0; i<2; i++){
 376                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
 377                 pic->motion_val[i]= pic->motion_val_base[i]+2;
 378                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 379             }
 380             pic->motion_subsample_log2= 2;
 381         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 382             for(i=0; i<2; i++){
 383                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t))
 384                 pic->motion_val[i]= pic->motion_val_base[i]+2;
 385                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 386             }
 387             pic->motion_subsample_log2= 3;
 388         }
 389         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 390             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 391         }
 392         pic->qstride= s->mb_stride;
 393         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 394     }
 395
 396     //it might be nicer if the application would keep track of these but it would require a API change
 397     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 398     s->prev_pict_types[0]= s->pict_type;
 399     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 400         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 401
 402     return 0;
 403 fail: //for the CHECKED_ALLOCZ macro
 404     return -1;
 405 }
 406
 407 /**
 408  * deallocates a picture
 409  */
 410 static void free_picture(MpegEncContext *s, Picture *pic){
 411     int i;
 412
 413     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 414         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 415     }
 416
 417     av_freep(&pic->mb_var);
 418     av_freep(&pic->mc_mb_var);
 419     av_freep(&pic->mb_mean);
 420     av_freep(&pic->mbskip_table);
 421     av_freep(&pic->qscale_table);
 422     av_freep(&pic->mb_type_base);
 423     av_freep(&pic->dct_coeff);
 424     av_freep(&pic->pan_scan);
 425     pic->mb_type= NULL;
 426     for(i=0; i<2; i++){
 427         av_freep(&pic->motion_val_base[i]);
 428         av_freep(&pic->ref_index[i]);
 429     }
 430
 431     if(pic->type == FF_BUFFER_TYPE_SHARED){
 432         for(i=0; i<4; i++){
 433             pic->base[i]=
 434             pic->data[i]= NULL;
 435         }
 436         pic->type= 0;
 437     }
 438 }
 439
 440 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 441     int i;
 442
 443     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 444     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 445     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 446
 447      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 448     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 449     s->rd_scratchpad=   s->me.scratchpad;
 450     s->b_scratchpad=    s->me.scratchpad;
 451     s->obmc_scratchpad= s->me.scratchpad + 16;
 452     if (s->encoding) {
 453         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 454         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 455         if(s->avctx->noise_reduction){
 456             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 457         }
 458     }
 459     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 460     s->block= s->blocks[0];
 461
 462     for(i=0;i<12;i++){
 463         s->pblocks[i] = (short *)(&s->block[i]);
 464     }
 465     return 0;
 466 fail:
 467     return -1; //free() through MPV_common_end()
 468 }
 469
 470 static void free_duplicate_context(MpegEncContext *s){
 471     if(s==NULL) return;
 472
 473     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 474     av_freep(&s->me.scratchpad);
 475     s->rd_scratchpad=
 476     s->b_scratchpad=
 477     s->obmc_scratchpad= NULL;
 478
 479     av_freep(&s->dct_error_sum);
 480     av_freep(&s->me.map);
 481     av_freep(&s->me.score_map);
 482     av_freep(&s->blocks);
 483     s->block= NULL;
 484 }
 485
 486 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 487 #define COPY(a) bak->a= src->a
 488     COPY(allocated_edge_emu_buffer);
 489     COPY(edge_emu_buffer);
 490     COPY(me.scratchpad);
 491     COPY(rd_scratchpad);
 492     COPY(b_scratchpad);
 493     COPY(obmc_scratchpad);
 494     COPY(me.map);
 495     COPY(me.score_map);
 496     COPY(blocks);
 497     COPY(block);
 498     COPY(start_mb_y);
 499     COPY(end_mb_y);
 500     COPY(me.map_generation);
 501     COPY(pb);
 502     COPY(dct_error_sum);
 503     COPY(dct_count[0]);
 504     COPY(dct_count[1]);
 505 #undef COPY
 506 }
 507
 508 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 509     MpegEncContext bak;
 510     int i;
 511     //FIXME copy only needed parts
 512 //START_TIMER
 513     backup_duplicate_context(&bak, dst);
 514     memcpy(dst, src, sizeof(MpegEncContext));
 515     backup_duplicate_context(dst, &bak);
 516     for(i=0;i<12;i++){
 517         dst->pblocks[i] = (short *)(&dst->block[i]);
 518     }
 519 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 520 }
 521
 522 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
 523 #define COPY(a) dst->a= src->a
 524     COPY(pict_type);
 525     COPY(current_picture);
 526     COPY(f_code);
 527     COPY(b_code);
 528     COPY(qscale);
 529     COPY(lambda);
 530     COPY(lambda2);
 531     COPY(picture_in_gop_number);
 532     COPY(gop_picture_number);
 533     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
 534     COPY(progressive_frame); //FIXME dont set in encode_header
 535     COPY(partitioned_frame); //FIXME dont set in encode_header
 536 #undef COPY
 537 }
 538
 539 /**
 540  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 541  * the changed fields will not depend upon the prior state of the MpegEncContext.
 542  */
 543 static void MPV_common_defaults(MpegEncContext *s){
 544     s->y_dc_scale_table=
 545     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 546     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 547     s->progressive_frame= 1;
 548     s->progressive_sequence= 1;
 549     s->picture_structure= PICT_FRAME;
 550
 551     s->coded_picture_number = 0;
 552     s->picture_number = 0;
 553     s->input_picture_number = 0;
 554
 555     s->picture_in_gop_number = 0;
 556
 557     s->f_code = 1;
 558     s->b_code = 1;
 559 }
 560
 561 /**
 562  * sets the given MpegEncContext to defaults for decoding.
 563  * the changed fields will not depend upon the prior state of the MpegEncContext.
 564  */
 565 void MPV_decode_defaults(MpegEncContext *s){
 566     MPV_common_defaults(s);
 567 }
 568
 569 /**
 570  * sets the given MpegEncContext to defaults for encoding.
 571  * the changed fields will not depend upon the prior state of the MpegEncContext.
 572  */
 573
 574 #ifdef CONFIG_ENCODERS
 575 static void MPV_encode_defaults(MpegEncContext *s){
 576     static int done=0;
 577
 578     MPV_common_defaults(s);
 579
 580     if(!done){
 581         int i;
 582         done=1;
 583
 584         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 585         memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
 586         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 587
 588         for(i=-16; i<16; i++){
 589             default_fcode_tab[i + MAX_MV]= 1;
 590         }
 591     }
 592     s->me.mv_penalty= default_mv_penalty;
 593     s->fcode_tab= default_fcode_tab;
 594 }
 595 #endif //CONFIG_ENCODERS
 596
 597 /**
 598  * init common structure for both encoder and decoder.
 599  * this assumes that some variables like width/height are already set
 600  */
 601 int MPV_common_init(MpegEncContext *s)
 602 {
 603     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 604
 605     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
 606         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 607         return -1;
 608     }
 609
 610     dsputil_init(&s->dsp, s->avctx);
 611     DCT_common_init(s);
 612
 613     s->flags= s->avctx->flags;
 614     s->flags2= s->avctx->flags2;
 615
 616     s->mb_width  = (s->width  + 15) / 16;
 617     s->mb_height = (s->height + 15) / 16;
 618     s->mb_stride = s->mb_width + 1;
 619     s->b8_stride = s->mb_width*2 + 1;
 620     s->b4_stride = s->mb_width*4 + 1;
 621     mb_array_size= s->mb_height * s->mb_stride;
 622     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 623
 624     /* set chroma shifts */
 625     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 626                                                     &(s->chroma_y_shift) );
 627
 628     /* set default edge pos, will be overriden in decode_header if needed */
 629     s->h_edge_pos= s->mb_width*16;
 630     s->v_edge_pos= s->mb_height*16;
 631
 632     s->mb_num = s->mb_width * s->mb_height;
 633
 634     s->block_wrap[0]=
 635     s->block_wrap[1]=
 636     s->block_wrap[2]=
 637     s->block_wrap[3]= s->b8_stride;
 638     s->block_wrap[4]=
 639     s->block_wrap[5]= s->mb_stride;
 640
 641     y_size = s->b8_stride * (2 * s->mb_height + 1);
 642     c_size = s->mb_stride * (s->mb_height + 1);
 643     yc_size = y_size + 2 * c_size;
 644
 645     /* convert fourcc to upper case */
 646     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 647                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 648                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 649                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 650
 651     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 652                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 653                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 654                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 655
 656     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 657
 658     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 659     for(y=0; y<s->mb_height; y++){
 660         for(x=0; x<s->mb_width; x++){
 661             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 662         }
 663     }
 664     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 665
 666     if (s->encoding) {
 667         /* Allocate MV tables */
 668         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 669         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 670         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 671         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 672         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 673         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 674         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 675         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 676         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 677         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 678         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 679         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 680
 681         if(s->msmpeg4_version){
 682             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 683         }
 684         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 685
 686         /* Allocate MB type table */
 687         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 688
 689         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 690
 691         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 692         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 693         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 694         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 695         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 696         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 697
 698         if(s->avctx->noise_reduction){
 699             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 700         }
 701     }
 702     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 703
 704     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 705
 706     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 707         /* interlaced direct mode decoding tables */
 708             for(i=0; i<2; i++){
 709                 int j, k;
 710                 for(j=0; j<2; j++){
 711                     for(k=0; k<2; k++){
 712                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 713                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 714                     }
 715                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 716                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 717                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 718                 }
 719                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 720             }
 721     }
 722     if (s->out_format == FMT_H263) {
 723         /* ac values */
 724         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 725         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 726         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 727         s->ac_val[2] = s->ac_val[1] + c_size;
 728
 729         /* cbp values */
 730         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 731         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 732
 733         /* divx501 bitstream reorder buffer */
 734         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 735
 736         /* cbp, ac_pred, pred_dir */
 737         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 738         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 739     }
 740
 741     if (s->h263_pred || s->h263_plus || !s->encoding) {
 742         /* dc values */
 743         //MN: we need these for error resilience of intra-frames
 744         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 745         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 746         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 747         s->dc_val[2] = s->dc_val[1] + c_size;
 748         for(i=0;i<yc_size;i++)
 749             s->dc_val_base[i] = 1024;
 750     }
 751
 752     /* which mb is a intra block */
 753     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 754     memset(s->mbintra_table, 1, mb_array_size);
 755
 756     /* init macroblock skip table */
 757     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 758     //Note the +1 is for a quicker mpeg4 slice_end detection
 759     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 760
 761     s->parse_context.state= -1;
 762     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 763        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 764        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 765        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 766     }
 767
 768     s->context_initialized = 1;
 769
 770     s->thread_context[0]= s;
 771     for(i=1; i<s->avctx->thread_count; i++){
 772         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 773         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 774     }
 775
 776     for(i=0; i<s->avctx->thread_count; i++){
 777         if(init_duplicate_context(s->thread_context[i], s) < 0)
 778            goto fail;
 779         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 780         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 781     }
 782
 783     return 0;
 784  fail:
 785     MPV_common_end(s);
 786     return -1;
 787 }
 788
 789 /* init common structure for both encoder and decoder */
 790 void MPV_common_end(MpegEncContext *s)
 791 {
 792     int i, j, k;
 793
 794     for(i=0; i<s->avctx->thread_count; i++){
 795         free_duplicate_context(s->thread_context[i]);
 796     }
 797     for(i=1; i<s->avctx->thread_count; i++){
 798         av_freep(&s->thread_context[i]);
 799     }
 800
 801     av_freep(&s->parse_context.buffer);
 802     s->parse_context.buffer_size=0;
 803
 804     av_freep(&s->mb_type);
 805     av_freep(&s->p_mv_table_base);
 806     av_freep(&s->b_forw_mv_table_base);
 807     av_freep(&s->b_back_mv_table_base);
 808     av_freep(&s->b_bidir_forw_mv_table_base);
 809     av_freep(&s->b_bidir_back_mv_table_base);
 810     av_freep(&s->b_direct_mv_table_base);
 811     s->p_mv_table= NULL;
 812     s->b_forw_mv_table= NULL;
 813     s->b_back_mv_table= NULL;
 814     s->b_bidir_forw_mv_table= NULL;
 815     s->b_bidir_back_mv_table= NULL;
 816     s->b_direct_mv_table= NULL;
 817     for(i=0; i<2; i++){
 818         for(j=0; j<2; j++){
 819             for(k=0; k<2; k++){
 820                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 821                 s->b_field_mv_table[i][j][k]=NULL;
 822             }
 823             av_freep(&s->b_field_select_table[i][j]);
 824             av_freep(&s->p_field_mv_table_base[i][j]);
 825             s->p_field_mv_table[i][j]=NULL;
 826         }
 827         av_freep(&s->p_field_select_table[i]);
 828     }
 829
 830     av_freep(&s->dc_val_base);
 831     av_freep(&s->ac_val_base);
 832     av_freep(&s->coded_block_base);
 833     av_freep(&s->mbintra_table);
 834     av_freep(&s->cbp_table);
 835     av_freep(&s->pred_dir_table);
 836
 837     av_freep(&s->mbskip_table);
 838     av_freep(&s->prev_pict_types);
 839     av_freep(&s->bitstream_buffer);
 840     av_freep(&s->avctx->stats_out);
 841     av_freep(&s->ac_stats);
 842     av_freep(&s->error_status_table);
 843     av_freep(&s->mb_index2xy);
 844     av_freep(&s->lambda_table);
 845     av_freep(&s->q_intra_matrix);
 846     av_freep(&s->q_inter_matrix);
 847     av_freep(&s->q_intra_matrix16);
 848     av_freep(&s->q_inter_matrix16);
 849     av_freep(&s->input_picture);
 850     av_freep(&s->reordered_input_picture);
 851     av_freep(&s->dct_offset);
 852
 853     if(s->picture){
 854         for(i=0; i<MAX_PICTURE_COUNT; i++){
 855             free_picture(s, &s->picture[i]);
 856         }
 857     }
 858     av_freep(&s->picture);
 859     s->context_initialized = 0;
 860     s->last_picture_ptr=
 861     s->next_picture_ptr=
 862     s->current_picture_ptr= NULL;
 863
 864     for(i=0; i<3; i++)
 865         av_freep(&s->visualization_buffer[i]);
 866 }
 867
 868 #ifdef CONFIG_ENCODERS
 869
 870 /* init video encoder */
 871 int MPV_encode_init(AVCodecContext *avctx)
 872 {
 873     MpegEncContext *s = avctx->priv_data;
 874     int i, dummy;
 875     int chroma_h_shift, chroma_v_shift;
 876
 877     MPV_encode_defaults(s);
 878
 879     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
 880
 881     s->bit_rate = avctx->bit_rate;
 882     s->width = avctx->width;
 883     s->height = avctx->height;
 884     if(avctx->gop_size > 600){
 885         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
 886         avctx->gop_size=600;
 887     }
 888     s->gop_size = avctx->gop_size;
 889     s->avctx = avctx;
 890     s->flags= avctx->flags;
 891     s->flags2= avctx->flags2;
 892     s->max_b_frames= avctx->max_b_frames;
 893     s->codec_id= avctx->codec->id;
 894     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 895     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 896     s->strict_std_compliance= avctx->strict_std_compliance;
 897     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 898     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 899     s->mpeg_quant= avctx->mpeg_quant;
 900     s->rtp_mode= !!avctx->rtp_payload_size;
 901     s->intra_dc_precision= avctx->intra_dc_precision;
 902
 903     if (s->gop_size <= 1) {
 904         s->intra_only = 1;
 905         s->gop_size = 12;
 906     } else {
 907         s->intra_only = 0;
 908     }
 909
 910     s->me_method = avctx->me_method;
 911
 912     /* Fixed QSCALE */
 913     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 914
 915     s->adaptive_quant= (   s->avctx->lumi_masking
 916                         || s->avctx->dark_masking
 917                         || s->avctx->temporal_cplx_masking
 918                         || s->avctx->spatial_cplx_masking
 919                         || s->avctx->p_masking
 920                         || (s->flags&CODEC_FLAG_QP_RD))
 921                        && !s->fixed_qscale;
 922
 923     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
 924     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 925     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
 926
 927     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
 928         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
 929         return -1;
 930     }
 931
 932     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
 933         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
 934     }
 935
 936     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
 937         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
 938         return -1;
 939     }
 940
 941     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
 942         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 943         return -1;
 944     }
 945
 946     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
 947        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
 948        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
 949
 950         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
 951     }
 952
 953     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
 954        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
 955         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 956         return -1;
 957     }
 958
 959     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
 960         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
 961         return -1;
 962     }
 963
 964     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
 965         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
 966         return -1;
 967     }
 968
 969     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
 970         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 971         return -1;
 972     }
 973
 974     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
 975         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
 976         return -1;
 977     }
 978
 979     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
 980         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 981         return -1;
 982     }
 983
 984     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
 985        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
 986         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 987         return -1;
 988     }
 989
 990     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
 991         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
 992         return -1;
 993     }
 994
 995     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
 996         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 997         return -1;
 998     }
 999
1000     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1001         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1002         return -1;
1003     }
1004
1005     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1006         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1007         return -1;
1008     }
1009
1010     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1011        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1012        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1013         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if(s->avctx->thread_count > 1)
1018         s->rtp_mode= 1;
1019
1020     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1021     if(i > 1){
1022         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1023         avctx->frame_rate /= i;
1024         avctx->frame_rate_base /= i;
1025 //        return -1;
1026     }
1027
1028     if(s->codec_id==CODEC_ID_MJPEG){
1029         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1030         s->inter_quant_bias= 0;
1031     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1032         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1033         s->inter_quant_bias= 0;
1034     }else{
1035         s->intra_quant_bias=0;
1036         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1037     }
1038
1039     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1040         s->intra_quant_bias= avctx->intra_quant_bias;
1041     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1042         s->inter_quant_bias= avctx->inter_quant_bias;
1043
1044     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1045
1046     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1047     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1048
1049     switch(avctx->codec->id) {
1050     case CODEC_ID_MPEG1VIDEO:
1051         s->out_format = FMT_MPEG1;
1052         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1053         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1054         break;
1055     case CODEC_ID_MPEG2VIDEO:
1056         s->out_format = FMT_MPEG1;
1057         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1058         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1059         s->rtp_mode= 1;
1060         break;
1061     case CODEC_ID_LJPEG:
1062     case CODEC_ID_MJPEG:
1063         s->out_format = FMT_MJPEG;
1064         s->intra_only = 1; /* force intra only for jpeg */
1065         s->mjpeg_write_tables = 1; /* write all tables */
1066         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1067         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1068         s->mjpeg_vsample[1] = 1;
1069         s->mjpeg_vsample[2] = 1;
1070         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1071         s->mjpeg_hsample[1] = 1;
1072         s->mjpeg_hsample[2] = 1;
1073         if (mjpeg_init(s) < 0)
1074             return -1;
1075         avctx->delay=0;
1076         s->low_delay=1;
1077         break;
1078 #ifdef CONFIG_RISKY
1079     case CODEC_ID_H263:
1080         if (h263_get_picture_format(s->width, s->height) == 7) {
1081             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1082             return -1;
1083         }
1084         s->out_format = FMT_H263;
1085         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1086         avctx->delay=0;
1087         s->low_delay=1;
1088         break;
1089     case CODEC_ID_H263P:
1090         s->out_format = FMT_H263;
1091         s->h263_plus = 1;
1092         /* Fx */
1093         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1094         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1095         s->modified_quant= s->h263_aic;
1096         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1097         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1098         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1099         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1100         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1101
1102         /* /Fx */
1103         /* These are just to be sure */
1104         avctx->delay=0;
1105         s->low_delay=1;
1106         break;
1107     case CODEC_ID_FLV1:
1108         s->out_format = FMT_H263;
1109         s->h263_flv = 2; /* format = 1; 11-bit codes */
1110         s->unrestricted_mv = 1;
1111         s->rtp_mode=0; /* don't allow GOB */
1112         avctx->delay=0;
1113         s->low_delay=1;
1114         break;
1115     case CODEC_ID_RV10:
1116         s->out_format = FMT_H263;
1117         avctx->delay=0;
1118         s->low_delay=1;
1119         break;
1120     case CODEC_ID_MPEG4:
1121         s->out_format = FMT_H263;
1122         s->h263_pred = 1;
1123         s->unrestricted_mv = 1;
1124         s->low_delay= s->max_b_frames ? 0 : 1;
1125         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1126         break;
1127     case CODEC_ID_MSMPEG4V1:
1128         s->out_format = FMT_H263;
1129         s->h263_msmpeg4 = 1;
1130         s->h263_pred = 1;
1131         s->unrestricted_mv = 1;
1132         s->msmpeg4_version= 1;
1133         avctx->delay=0;
1134         s->low_delay=1;
1135         break;
1136     case CODEC_ID_MSMPEG4V2:
1137         s->out_format = FMT_H263;
1138         s->h263_msmpeg4 = 1;
1139         s->h263_pred = 1;
1140         s->unrestricted_mv = 1;
1141         s->msmpeg4_version= 2;
1142         avctx->delay=0;
1143         s->low_delay=1;
1144         break;
1145     case CODEC_ID_MSMPEG4V3:
1146         s->out_format = FMT_H263;
1147         s->h263_msmpeg4 = 1;
1148         s->h263_pred = 1;
1149         s->unrestricted_mv = 1;
1150         s->msmpeg4_version= 3;
1151         s->flipflop_rounding=1;
1152         avctx->delay=0;
1153         s->low_delay=1;
1154         break;
1155     case CODEC_ID_WMV1:
1156         s->out_format = FMT_H263;
1157         s->h263_msmpeg4 = 1;
1158         s->h263_pred = 1;
1159         s->unrestricted_mv = 1;
1160         s->msmpeg4_version= 4;
1161         s->flipflop_rounding=1;
1162         avctx->delay=0;
1163         s->low_delay=1;
1164         break;
1165     case CODEC_ID_WMV2:
1166         s->out_format = FMT_H263;
1167         s->h263_msmpeg4 = 1;
1168         s->h263_pred = 1;
1169         s->unrestricted_mv = 1;
1170         s->msmpeg4_version= 5;
1171         s->flipflop_rounding=1;
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175 #endif
1176     default:
1177         return -1;
1178     }
1179
1180     avctx->has_b_frames= !s->low_delay;
1181
1182     s->encoding = 1;
1183
1184     /* init */
1185     if (MPV_common_init(s) < 0)
1186         return -1;
1187
1188     if(s->modified_quant)
1189         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1190     s->progressive_frame=
1191     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1192     s->quant_precision=5;
1193
1194     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1195
1196 #ifdef CONFIG_ENCODERS
1197 #ifdef CONFIG_RISKY
1198     if (s->out_format == FMT_H263)
1199         h263_encode_init(s);
1200     if(s->msmpeg4_version)
1201         ff_msmpeg4_encode_init(s);
1202 #endif
1203     if (s->out_format == FMT_MPEG1)
1204         ff_mpeg1_encode_init(s);
1205 #endif
1206
1207     /* init q matrix */
1208     for(i=0;i<64;i++) {
1209         int j= s->dsp.idct_permutation[i];
1210 #ifdef CONFIG_RISKY
1211         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1212             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1213             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1214         }else if(s->out_format == FMT_H263){
1215             s->intra_matrix[j] =
1216             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1217         }else
1218 #endif
1219         { /* mpeg1/2 */
1220             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1221             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1222         }
1223         if(s->avctx->intra_matrix)
1224             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1225         if(s->avctx->inter_matrix)
1226             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1227     }
1228
1229     /* precompute matrix */
1230     /* for mjpeg, we do include qscale in the matrix */
1231     if (s->out_format != FMT_MJPEG) {
1232         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1233                        s->intra_matrix, s->intra_quant_bias, 1, 31);
1234         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1235                        s->inter_matrix, s->inter_quant_bias, 1, 31);
1236     }
1237
1238     if(ff_rate_control_init(s) < 0)
1239         return -1;
1240
1241     return 0;
1242 }
1243
1244 int MPV_encode_end(AVCodecContext *avctx)
1245 {
1246     MpegEncContext *s = avctx->priv_data;
1247
1248 #ifdef STATS
1249     print_stats();
1250 #endif
1251
1252     ff_rate_control_uninit(s);
1253
1254     MPV_common_end(s);
1255     if (s->out_format == FMT_MJPEG)
1256         mjpeg_close(s);
1257
1258     av_freep(&avctx->extradata);
1259
1260     return 0;
1261 }
1262
1263 #endif //CONFIG_ENCODERS
1264
1265 void init_rl(RLTable *rl)
1266 {
1267     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1268     uint8_t index_run[MAX_RUN+1];
1269     int last, run, level, start, end, i;
1270
1271     /* compute max_level[], max_run[] and index_run[] */
1272     for(last=0;last<2;last++) {
1273         if (last == 0) {
1274             start = 0;
1275             end = rl->last;
1276         } else {
1277             start = rl->last;
1278             end = rl->n;
1279         }
1280
1281         memset(max_level, 0, MAX_RUN + 1);
1282         memset(max_run, 0, MAX_LEVEL + 1);
1283         memset(index_run, rl->n, MAX_RUN + 1);
1284         for(i=start;i<end;i++) {
1285             run = rl->table_run[i];
1286             level = rl->table_level[i];
1287             if (index_run[run] == rl->n)
1288                 index_run[run] = i;
1289             if (level > max_level[run])
1290                 max_level[run] = level;
1291             if (run > max_run[level])
1292                 max_run[level] = run;
1293         }
1294         rl->max_level[last] = av_malloc(MAX_RUN + 1);
1295         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1296         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1297         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1298         rl->index_run[last] = av_malloc(MAX_RUN + 1);
1299         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1300     }
1301 }
1302
1303 /* draw the edges of width 'w' of an image of size width, height */
1304 //FIXME check that this is ok for mpeg4 interlaced
1305 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1306 {
1307     uint8_t *ptr, *last_line;
1308     int i;
1309
1310     last_line = buf + (height - 1) * wrap;
1311     for(i=0;i<w;i++) {
1312         /* top and bottom */
1313         memcpy(buf - (i + 1) * wrap, buf, width);
1314         memcpy(last_line + (i + 1) * wrap, last_line, width);
1315     }
1316     /* left and right */
1317     ptr = buf;
1318     for(i=0;i<height;i++) {
1319         memset(ptr - w, ptr[0], w);
1320         memset(ptr + width, ptr[width-1], w);
1321         ptr += wrap;
1322     }
1323     /* corners */
1324     for(i=0;i<w;i++) {
1325         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1326         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1327         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1328         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1329     }
1330 }
1331
1332 int ff_find_unused_picture(MpegEncContext *s, int shared){
1333     int i;
1334
1335     if(shared){
1336         for(i=0; i<MAX_PICTURE_COUNT; i++){
1337             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1338         }
1339     }else{
1340         for(i=0; i<MAX_PICTURE_COUNT; i++){
1341             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1342         }
1343         for(i=0; i<MAX_PICTURE_COUNT; i++){
1344             if(s->picture[i].data[0]==NULL) return i;
1345         }
1346     }
1347
1348     assert(0);
1349     return -1;
1350 }
1351
1352 static void update_noise_reduction(MpegEncContext *s){
1353     int intra, i;
1354
1355     for(intra=0; intra<2; intra++){
1356         if(s->dct_count[intra] > (1<<16)){
1357             for(i=0; i<64; i++){
1358                 s->dct_error_sum[intra][i] >>=1;
1359             }
1360             s->dct_count[intra] >>= 1;
1361         }
1362
1363         for(i=0; i<64; i++){
1364             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1365         }
1366     }
1367 }
1368
1369 /**
1370  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1371  */
1372 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1373 {
1374     int i;
1375     AVFrame *pic;
1376     s->mb_skiped = 0;
1377
1378     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1379
1380     /* mark&release old frames */
1381     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1382         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1383
1384         /* release forgotten pictures */
1385         /* if(mpeg124/h263) */
1386         if(!s->encoding){
1387             for(i=0; i<MAX_PICTURE_COUNT; i++){
1388                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1389                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1390                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1391                 }
1392             }
1393         }
1394     }
1395 alloc:
1396     if(!s->encoding){
1397         /* release non refernce frames */
1398         for(i=0; i<MAX_PICTURE_COUNT; i++){
1399             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1400                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1401             }
1402         }
1403
1404         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1405             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1406         else{
1407             i= ff_find_unused_picture(s, 0);
1408             pic= (AVFrame*)&s->picture[i];
1409         }
1410
1411         pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0;
1412
1413         pic->coded_picture_number= s->coded_picture_number++;
1414
1415         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1416             return -1;
1417
1418         s->current_picture_ptr= (Picture*)pic;
1419         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1420         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1421     }
1422
1423     s->current_picture_ptr->pict_type= s->pict_type;
1424 //    if(s->flags && CODEC_FLAG_QSCALE)
1425   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1426     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1427
1428     copy_picture(&s->current_picture, s->current_picture_ptr);
1429
1430   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1431     if (s->pict_type != B_TYPE) {
1432         s->last_picture_ptr= s->next_picture_ptr;
1433         if(!s->dropable)
1434             s->next_picture_ptr= s->current_picture_ptr;
1435     }
1436 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1437         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1438         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1439         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1440         s->pict_type, s->dropable);*/
1441
1442     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1443     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1444
1445     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1446         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1447         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1448         goto alloc;
1449     }
1450
1451     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1452
1453     if(s->picture_structure!=PICT_FRAME){
1454         int i;
1455         for(i=0; i<4; i++){
1456             if(s->picture_structure == PICT_BOTTOM_FIELD){
1457                  s->current_picture.data[i] += s->current_picture.linesize[i];
1458             }
1459             s->current_picture.linesize[i] *= 2;
1460             s->last_picture.linesize[i] *=2;
1461             s->next_picture.linesize[i] *=2;
1462         }
1463     }
1464   }
1465
1466     s->hurry_up= s->avctx->hurry_up;
1467     s->error_resilience= avctx->error_resilience;
1468
1469     /* set dequantizer, we cant do it during init as it might change for mpeg4
1470        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1471     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1472         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1473         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1474     }else if(s->out_format == FMT_H263){
1475         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1476         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1477     }else if(s->out_format == FMT_H261){
1478         s->dct_unquantize_intra = s->dct_unquantize_h261_intra;
1479         s->dct_unquantize_inter = s->dct_unquantize_h261_inter;
1480     }else{
1481         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1482         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1483     }
1484
1485     if(s->dct_error_sum){
1486         assert(s->avctx->noise_reduction && s->encoding);
1487
1488         update_noise_reduction(s);
1489     }
1490
1491 #ifdef HAVE_XVMC
1492     if(s->avctx->xvmc_acceleration)
1493         return XVMC_field_start(s, avctx);
1494 #endif
1495     return 0;
1496 }
1497
1498 /* generic function for encode/decode called after a frame has been coded/decoded */
1499 void MPV_frame_end(MpegEncContext *s)
1500 {
1501     int i;
1502     /* draw edge for correct motion prediction if outside */
1503 #ifdef HAVE_XVMC
1504 //just to make sure that all data is rendered.
1505     if(s->avctx->xvmc_acceleration){
1506         XVMC_field_end(s);
1507     }else
1508 #endif
1509     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1510             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1511             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1512             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1513     }
1514     emms_c();
1515
1516     s->last_pict_type    = s->pict_type;
1517     if(s->pict_type!=B_TYPE){
1518         s->last_non_b_pict_type= s->pict_type;
1519     }
1520 #if 0
1521         /* copy back current_picture variables */
1522     for(i=0; i<MAX_PICTURE_COUNT; i++){
1523         if(s->picture[i].data[0] == s->current_picture.data[0]){
1524             s->picture[i]= s->current_picture;
1525             break;
1526         }
1527     }
1528     assert(i<MAX_PICTURE_COUNT);
1529 #endif
1530
1531     if(s->encoding){
1532         /* release non refernce frames */
1533         for(i=0; i<MAX_PICTURE_COUNT; i++){
1534             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1535                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1536             }
1537         }
1538     }
1539     // clear copies, to avoid confusion
1540 #if 0
1541     memset(&s->last_picture, 0, sizeof(Picture));
1542     memset(&s->next_picture, 0, sizeof(Picture));
1543     memset(&s->current_picture, 0, sizeof(Picture));
1544 #endif
1545 }
1546
1547 /**
1548  * draws an line from (ex, ey) -> (sx, sy).
1549  * @param w width of the image
1550  * @param h height of the image
1551  * @param stride stride/linesize of the image
1552  * @param color color of the arrow
1553  */
1554 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1555     int t, x, y, fr, f;
1556
1557     sx= clip(sx, 0, w-1);
1558     sy= clip(sy, 0, h-1);
1559     ex= clip(ex, 0, w-1);
1560     ey= clip(ey, 0, h-1);
1561
1562     buf[sy*stride + sx]+= color;
1563
1564     if(ABS(ex - sx) > ABS(ey - sy)){
1565         if(sx > ex){
1566             t=sx; sx=ex; ex=t;
1567             t=sy; sy=ey; ey=t;
1568         }
1569         buf+= sx + sy*stride;
1570         ex-= sx;
1571         f= ((ey-sy)<<16)/ex;
1572         for(x= 0; x <= ex; x++){
1573             y = (x*f)>>16;
1574             fr= (x*f)&0xFFFF;
1575             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1576             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1577         }
1578     }else{
1579         if(sy > ey){
1580             t=sx; sx=ex; ex=t;
1581             t=sy; sy=ey; ey=t;
1582         }
1583         buf+= sx + sy*stride;
1584         ey-= sy;
1585         if(ey) f= ((ex-sx)<<16)/ey;
1586         else   f= 0;
1587         for(y= 0; y <= ey; y++){
1588             x = (y*f)>>16;
1589             fr= (y*f)&0xFFFF;
1590             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1591             buf[y*stride + x+1]+= (color*         fr )>>16;;
1592         }
1593     }
1594 }
1595
1596 /**
1597  * draws an arrow from (ex, ey) -> (sx, sy).
1598  * @param w width of the image
1599  * @param h height of the image
1600  * @param stride stride/linesize of the image
1601  * @param color color of the arrow
1602  */
1603 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1604     int dx,dy;
1605
1606     sx= clip(sx, -100, w+100);
1607     sy= clip(sy, -100, h+100);
1608     ex= clip(ex, -100, w+100);
1609     ey= clip(ey, -100, h+100);
1610
1611     dx= ex - sx;
1612     dy= ey - sy;
1613
1614     if(dx*dx + dy*dy > 3*3){
1615         int rx=  dx + dy;
1616         int ry= -dx + dy;
1617         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1618
1619         //FIXME subpixel accuracy
1620         rx= ROUNDED_DIV(rx*3<<4, length);
1621         ry= ROUNDED_DIV(ry*3<<4, length);
1622
1623         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1624         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1625     }
1626     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1627 }
1628
1629 /**
1630  * prints debuging info for the given picture.
1631  */
1632 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1633
1634     if(!pict || !pict->mb_type) return;
1635
1636     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1637         int x,y;
1638
1639         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1640         switch (pict->pict_type) {
1641             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1642             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1643             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1644             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1645             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1646             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1647         }
1648         for(y=0; y<s->mb_height; y++){
1649             for(x=0; x<s->mb_width; x++){
1650                 if(s->avctx->debug&FF_DEBUG_SKIP){
1651                     int count= s->mbskip_table[x + y*s->mb_stride];
1652                     if(count>9) count=9;
1653                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1654                 }
1655                 if(s->avctx->debug&FF_DEBUG_QP){
1656                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1657                 }
1658                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1659                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1660                     //Type & MV direction
1661                     if(IS_PCM(mb_type))
1662                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1663                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1664                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1665                     else if(IS_INTRA4x4(mb_type))
1666                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1667                     else if(IS_INTRA16x16(mb_type))
1668                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1669                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1670                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1671                     else if(IS_DIRECT(mb_type))
1672                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1673                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1674                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1675                     else if(IS_GMC(mb_type))
1676                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1677                     else if(IS_SKIP(mb_type))
1678                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1679                     else if(!USES_LIST(mb_type, 1))
1680                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1681                     else if(!USES_LIST(mb_type, 0))
1682                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1683                     else{
1684                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1685                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1686                     }
1687
1688                     //segmentation
1689                     if(IS_8X8(mb_type))
1690                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1691                     else if(IS_16X8(mb_type))
1692                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1693                     else if(IS_8X16(mb_type))
1694                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1695                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1696                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1697                     else
1698                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1699
1700
1701                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1702                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1703                     else
1704                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1705                 }
1706 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1707             }
1708             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1709         }
1710     }
1711
1712     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1713         const int shift= 1 + s->quarter_sample;
1714         int mb_y;
1715         uint8_t *ptr;
1716         int i;
1717         int h_chroma_shift, v_chroma_shift;
1718         s->low_delay=0; //needed to see the vectors without trashing the buffers
1719
1720         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1721         for(i=0; i<3; i++){
1722             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*s->height:pict->linesize[i]*s->height >> v_chroma_shift);
1723             pict->data[i]= s->visualization_buffer[i];
1724         }
1725         pict->type= FF_BUFFER_TYPE_COPY;
1726         ptr= pict->data[0];
1727
1728         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1729             int mb_x;
1730             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1731                 const int mb_index= mb_x + mb_y*s->mb_stride;
1732                 if((s->avctx->debug_mv) && pict->motion_val){
1733                   int type;
1734                   for(type=0; type<3; type++){
1735                     int direction = 0;
1736                     switch (type) {
1737                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1738                                 continue;
1739                               direction = 0;
1740                               break;
1741                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1742                                 continue;
1743                               direction = 0;
1744                               break;
1745                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1746                                 continue;
1747                               direction = 1;
1748                               break;
1749                     }
1750                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1751                         continue;
1752
1753                     //FIXME for h264
1754                     if(IS_8X8(pict->mb_type[mb_index])){
1755                       int i;
1756                       for(i=0; i<4; i++){
1757                         int sx= mb_x*16 + 4 + 8*(i&1);
1758                         int sy= mb_y*16 + 4 + 8*(i>>1);
1759                         int xy= mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride;
1760                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1761                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1762                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1763                       }
1764                     }else if(IS_16X8(pict->mb_type[mb_index])){
1765                       int i;
1766                       for(i=0; i<2; i++){
1767                         int sx=mb_x*16 + 8;
1768                         int sy=mb_y*16 + 4 + 8*i;
1769                         int xy= mb_x*2 + (mb_y*2 + i)*s->b8_stride;
1770                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1771                         int my=(pict->motion_val[direction][xy][1]>>shift);
1772
1773                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1774                             my*=2;
1775
1776                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, s->width, s->height, s->linesize, 100);
1777                       }
1778                     }else{
1779                       int sx= mb_x*16 + 8;
1780                       int sy= mb_y*16 + 8;
1781                       int xy= mb_x*2 + mb_y*2*s->b8_stride;
1782                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1783                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1784                       draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1785                     }
1786                   }
1787                 }
1788                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1789                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1790                     int y;
1791                     for(y=0; y<8; y++){
1792                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1793                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1794                     }
1795                 }
1796                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1797                     int mb_type= pict->mb_type[mb_index];
1798                     uint64_t u,v;
1799                     int y;
1800 #define COLOR(theta, r)\
1801 u= (int)(128 + r*cos(theta*3.141592/180));\
1802 v= (int)(128 + r*sin(theta*3.141592/180));
1803
1804
1805                     u=v=128;
1806                     if(IS_PCM(mb_type)){
1807                         COLOR(120,48)
1808                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1809                         COLOR(30,48)
1810                     }else if(IS_INTRA4x4(mb_type)){
1811                         COLOR(90,48)
1812                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1813 //                        COLOR(120,48)
1814                     }else if(IS_DIRECT(mb_type)){
1815                         COLOR(150,48)
1816                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1817                         COLOR(170,48)
1818                     }else if(IS_GMC(mb_type)){
1819                         COLOR(190,48)
1820                     }else if(IS_SKIP(mb_type)){
1821 //                        COLOR(180,48)
1822                     }else if(!USES_LIST(mb_type, 1)){
1823                         COLOR(240,48)
1824                     }else if(!USES_LIST(mb_type, 0)){
1825                         COLOR(0,48)
1826                     }else{
1827                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1828                         COLOR(300,48)
1829                     }
1830
1831                     u*= 0x0101010101010101ULL;
1832                     v*= 0x0101010101010101ULL;
1833                     for(y=0; y<8; y++){
1834                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1835                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1836                     }
1837
1838                     //segmentation
1839                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1840                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1841                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1842                     }
1843                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1844                         for(y=0; y<16; y++)
1845                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1846                     }
1847
1848                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1849                         // hmm
1850                     }
1851                 }
1852                 s->mbskip_table[mb_index]=0;
1853             }
1854         }
1855     }
1856 }
1857
1858 #ifdef CONFIG_ENCODERS
1859
1860 static int get_sae(uint8_t *src, int ref, int stride){
1861     int x,y;
1862     int acc=0;
1863
1864     for(y=0; y<16; y++){
1865         for(x=0; x<16; x++){
1866             acc+= ABS(src[x+y*stride] - ref);
1867         }
1868     }
1869
1870     return acc;
1871 }
1872
1873 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1874     int x, y, w, h;
1875     int acc=0;
1876
1877     w= s->width &~15;
1878     h= s->height&~15;
1879
1880     for(y=0; y<h; y+=16){
1881         for(x=0; x<w; x+=16){
1882             int offset= x + y*stride;
1883             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1884             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1885             int sae = get_sae(src + offset, mean, stride);
1886
1887             acc+= sae + 500 < sad;
1888         }
1889     }
1890     return acc;
1891 }
1892
1893
1894 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1895     AVFrame *pic=NULL;
1896     int i;
1897     const int encoding_delay= s->max_b_frames;
1898     int direct=1;
1899
1900   if(pic_arg){
1901     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1902     if(pic_arg->linesize[0] != s->linesize) direct=0;
1903     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1904     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1905
1906 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1907
1908     if(direct){
1909         i= ff_find_unused_picture(s, 1);
1910
1911         pic= (AVFrame*)&s->picture[i];
1912         pic->reference= 3;
1913
1914         for(i=0; i<4; i++){
1915             pic->data[i]= pic_arg->data[i];
1916             pic->linesize[i]= pic_arg->linesize[i];
1917         }
1918         alloc_picture(s, (Picture*)pic, 1);
1919     }else{
1920         int offset= 16;
1921         i= ff_find_unused_picture(s, 0);
1922
1923         pic= (AVFrame*)&s->picture[i];
1924         pic->reference= 3;
1925
1926         alloc_picture(s, (Picture*)pic, 0);
1927
1928         if(   pic->data[0] + offset == pic_arg->data[0]
1929            && pic->data[1] + offset == pic_arg->data[1]
1930            && pic->data[2] + offset == pic_arg->data[2]){
1931        // empty
1932         }else{
1933             int h_chroma_shift, v_chroma_shift;
1934             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1935
1936             for(i=0; i<3; i++){
1937                 int src_stride= pic_arg->linesize[i];
1938                 int dst_stride= i ? s->uvlinesize : s->linesize;
1939                 int h_shift= i ? h_chroma_shift : 0;
1940                 int v_shift= i ? v_chroma_shift : 0;
1941                 int w= s->width >>h_shift;
1942                 int h= s->height>>v_shift;
1943                 uint8_t *src= pic_arg->data[i];
1944                 uint8_t *dst= pic->data[i] + offset;
1945
1946                 if(src_stride==dst_stride)
1947                     memcpy(dst, src, src_stride*h);
1948                 else{
1949                     while(h--){
1950                         memcpy(dst, src, w);
1951                         dst += dst_stride;
1952                         src += src_stride;
1953                     }
1954                 }
1955             }
1956         }
1957     }
1958     copy_picture_attributes(s, pic, pic_arg);
1959
1960     pic->display_picture_number= s->input_picture_number++;
1961     if(pic->pts != AV_NOPTS_VALUE){
1962         s->user_specified_pts= pic->pts;
1963     }else{
1964         if(s->user_specified_pts){
1965             pic->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
1966             av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic->pts);
1967         }else{
1968             pic->pts= av_rescale(pic->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
1969         }
1970     }
1971   }
1972
1973     /* shift buffer entries */
1974     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1975         s->input_picture[i-1]= s->input_picture[i];
1976
1977     s->input_picture[encoding_delay]= (Picture*)pic;
1978
1979     return 0;
1980 }
1981
1982 static void select_input_picture(MpegEncContext *s){
1983     int i;
1984
1985     for(i=1; i<MAX_PICTURE_COUNT; i++)
1986         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1987     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1988
1989     /* set next picture types & ordering */
1990     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1991         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1992             s->reordered_input_picture[0]= s->input_picture[0];
1993             s->reordered_input_picture[0]->pict_type= I_TYPE;
1994             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
1995         }else{
1996             int b_frames;
1997
1998             if(s->flags&CODEC_FLAG_PASS2){
1999                 for(i=0; i<s->max_b_frames+1; i++){
2000                     int pict_num= s->input_picture[0]->display_picture_number + i;
2001                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
2002                     s->input_picture[i]->pict_type= pict_type;
2003
2004                     if(i + 1 >= s->rc_context.num_entries) break;
2005                 }
2006             }
2007
2008             if(s->input_picture[0]->pict_type){
2009                 /* user selected pict_type */
2010                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
2011                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
2012                 }
2013
2014                 if(b_frames > s->max_b_frames){
2015                     av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2016                     b_frames = s->max_b_frames;
2017                 }
2018             }else if(s->avctx->b_frame_strategy==0){
2019                 b_frames= s->max_b_frames;
2020                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2021             }else if(s->avctx->b_frame_strategy==1){
2022                 for(i=1; i<s->max_b_frames+1; i++){
2023                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2024                         s->input_picture[i]->b_frame_score=
2025                             get_intra_count(s, s->input_picture[i  ]->data[0],
2026                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2027                     }
2028                 }
2029                 for(i=0; i<s->max_b_frames; i++){
2030                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2031                 }
2032
2033                 b_frames= FFMAX(0, i-1);
2034
2035                 /* reset scores */
2036                 for(i=0; i<b_frames+1; i++){
2037                     s->input_picture[i]->b_frame_score=0;
2038                 }
2039             }else{
2040                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2041                 b_frames=0;
2042             }
2043
2044             emms_c();
2045 //static int b_count=0;
2046 //b_count+= b_frames;
2047 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2048             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2049                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2050                     b_frames=0;
2051                 s->input_picture[b_frames]->pict_type= I_TYPE;
2052             }
2053
2054             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2055                && b_frames
2056                && s->input_picture[b_frames]->pict_type== I_TYPE)
2057                 b_frames--;
2058
2059             s->reordered_input_picture[0]= s->input_picture[b_frames];
2060             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2061                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2062             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2063             for(i=0; i<b_frames; i++){
2064                 s->reordered_input_picture[i+1]= s->input_picture[i];
2065                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2066                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2067             }
2068         }
2069     }
2070
2071     if(s->reordered_input_picture[0]){
2072         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2073
2074         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2075
2076         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2077             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2078
2079             int i= ff_find_unused_picture(s, 0);
2080             Picture *pic= &s->picture[i];
2081
2082             /* mark us unused / free shared pic */
2083             for(i=0; i<4; i++)
2084                 s->reordered_input_picture[0]->data[i]= NULL;
2085             s->reordered_input_picture[0]->type= 0;
2086
2087             pic->reference              = s->reordered_input_picture[0]->reference;
2088
2089             alloc_picture(s, pic, 0);
2090
2091             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2092
2093             s->current_picture_ptr= pic;
2094         }else{
2095             // input is not a shared pix -> reuse buffer for current_pix
2096
2097             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2098                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2099
2100             s->current_picture_ptr= s->reordered_input_picture[0];
2101             for(i=0; i<4; i++){
2102                 s->new_picture.data[i]+=16;
2103             }
2104         }
2105         copy_picture(&s->current_picture, s->current_picture_ptr);
2106
2107         s->picture_number= s->new_picture.display_picture_number;
2108 //printf("dpn:%d\n", s->picture_number);
2109     }else{
2110        memset(&s->new_picture, 0, sizeof(Picture));
2111     }
2112 }
2113
2114 int MPV_encode_picture(AVCodecContext *avctx,
2115                        unsigned char *buf, int buf_size, void *data)
2116 {
2117     MpegEncContext *s = avctx->priv_data;
2118     AVFrame *pic_arg = data;
2119     int i, stuffing_count;
2120
2121     if(avctx->pix_fmt != PIX_FMT_YUV420P){
2122         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2123         return -1;
2124     }
2125
2126     for(i=0; i<avctx->thread_count; i++){
2127         int start_y= s->thread_context[i]->start_mb_y;
2128         int   end_y= s->thread_context[i]->  end_mb_y;
2129         int h= s->mb_height;
2130         uint8_t *start= buf + buf_size*start_y/h;
2131         uint8_t *end  = buf + buf_size*  end_y/h;
2132
2133         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2134     }
2135
2136     s->picture_in_gop_number++;
2137
2138     load_input_picture(s, pic_arg);
2139
2140     select_input_picture(s);
2141
2142     /* output? */
2143     if(s->new_picture.data[0]){
2144         s->pict_type= s->new_picture.pict_type;
2145 //emms_c();
2146 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2147         MPV_frame_start(s, avctx);
2148
2149         encode_picture(s, s->picture_number);
2150
2151         avctx->real_pict_num  = s->picture_number;
2152         avctx->header_bits = s->header_bits;
2153         avctx->mv_bits     = s->mv_bits;
2154         avctx->misc_bits   = s->misc_bits;
2155         avctx->i_tex_bits  = s->i_tex_bits;
2156         avctx->p_tex_bits  = s->p_tex_bits;
2157         avctx->i_count     = s->i_count;
2158         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2159         avctx->skip_count  = s->skip_count;
2160
2161         MPV_frame_end(s);
2162
2163         if (s->out_format == FMT_MJPEG)
2164             mjpeg_picture_trailer(s);
2165
2166         if(s->flags&CODEC_FLAG_PASS1)
2167             ff_write_pass1_stats(s);
2168
2169         for(i=0; i<4; i++){
2170             avctx->error[i] += s->current_picture_ptr->error[i];
2171         }
2172
2173         flush_put_bits(&s->pb);
2174         s->frame_bits  = put_bits_count(&s->pb);
2175
2176         stuffing_count= ff_vbv_update(s, s->frame_bits);
2177         if(stuffing_count){
2178             switch(s->codec_id){
2179             case CODEC_ID_MPEG1VIDEO:
2180             case CODEC_ID_MPEG2VIDEO:
2181                 while(stuffing_count--){
2182                     put_bits(&s->pb, 8, 0);
2183                 }
2184             break;
2185             case CODEC_ID_MPEG4:
2186                 put_bits(&s->pb, 16, 0);
2187                 put_bits(&s->pb, 16, 0x1C3);
2188                 stuffing_count -= 4;
2189                 while(stuffing_count--){
2190                     put_bits(&s->pb, 8, 0xFF);
2191                 }
2192             break;
2193             default:
2194                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2195             }
2196             flush_put_bits(&s->pb);
2197             s->frame_bits  = put_bits_count(&s->pb);
2198         }
2199
2200         /* update mpeg1/2 vbv_delay for CBR */
2201         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2202            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2203             int vbv_delay;
2204
2205             assert(s->repeat_first_field==0);
2206
2207             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2208             assert(vbv_delay < 0xFFFF);
2209
2210             s->vbv_delay_ptr[0] &= 0xF8;
2211             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2212             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2213             s->vbv_delay_ptr[2] &= 0x07;
2214             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2215         }
2216         s->total_bits += s->frame_bits;
2217         avctx->frame_bits  = s->frame_bits;
2218     }else{
2219         assert((pbBufPtr(&s->pb) == s->pb.buf));
2220         s->frame_bits=0;
2221     }
2222     assert((s->frame_bits&7)==0);
2223
2224     return s->frame_bits/8;
2225 }
2226
2227 #endif //CONFIG_ENCODERS
2228
2229 static inline void gmc1_motion(MpegEncContext *s,
2230                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2231                                uint8_t **ref_picture)
2232 {
2233     uint8_t *ptr;
2234     int offset, src_x, src_y, linesize, uvlinesize;
2235     int motion_x, motion_y;
2236     int emu=0;
2237
2238     motion_x= s->sprite_offset[0][0];
2239     motion_y= s->sprite_offset[0][1];
2240     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2241     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2242     motion_x<<=(3-s->sprite_warping_accuracy);
2243     motion_y<<=(3-s->sprite_warping_accuracy);
2244     src_x = clip(src_x, -16, s->width);
2245     if (src_x == s->width)
2246         motion_x =0;
2247     src_y = clip(src_y, -16, s->height);
2248     if (src_y == s->height)
2249         motion_y =0;
2250
2251     linesize = s->linesize;
2252     uvlinesize = s->uvlinesize;
2253
2254     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2255
2256     if(s->flags&CODEC_FLAG_EMU_EDGE){
2257         if(   (unsigned)src_x >= s->h_edge_pos - 17
2258            || (unsigned)src_y >= s->v_edge_pos - 17){
2259             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2260             ptr= s->edge_emu_buffer;
2261         }
2262     }
2263
2264     if((motion_x|motion_y)&7){
2265         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2266         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2267     }else{
2268         int dxy;
2269
2270         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2271         if (s->no_rounding){
2272             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2273         }else{
2274             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2275         }
2276     }
2277
2278     if(s->flags&CODEC_FLAG_GRAY) return;
2279
2280     motion_x= s->sprite_offset[1][0];
2281     motion_y= s->sprite_offset[1][1];
2282     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2283     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2284     motion_x<<=(3-s->sprite_warping_accuracy);
2285     motion_y<<=(3-s->sprite_warping_accuracy);
2286     src_x = clip(src_x, -8, s->width>>1);
2287     if (src_x == s->width>>1)
2288         motion_x =0;
2289     src_y = clip(src_y, -8, s->height>>1);
2290     if (src_y == s->height>>1)
2291         motion_y =0;
2292
2293     offset = (src_y * uvlinesize) + src_x;
2294     ptr = ref_picture[1] + offset;
2295     if(s->flags&CODEC_FLAG_EMU_EDGE){
2296         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2297            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2298             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2299             ptr= s->edge_emu_buffer;
2300             emu=1;
2301         }
2302     }
2303     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2304
2305     ptr = ref_picture[2] + offset;
2306     if(emu){
2307         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2308         ptr= s->edge_emu_buffer;
2309     }
2310     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2311
2312     return;
2313 }
2314
2315 static inline void gmc_motion(MpegEncContext *s,
2316                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2317                                uint8_t **ref_picture)
2318 {
2319     uint8_t *ptr;
2320     int linesize, uvlinesize;
2321     const int a= s->sprite_warping_accuracy;
2322     int ox, oy;
2323
2324     linesize = s->linesize;
2325     uvlinesize = s->uvlinesize;
2326
2327     ptr = ref_picture[0];
2328
2329     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2330     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2331
2332     s->dsp.gmc(dest_y, ptr, linesize, 16,
2333            ox,
2334            oy,
2335            s->sprite_delta[0][0], s->sprite_delta[0][1],
2336            s->sprite_delta[1][0], s->sprite_delta[1][1],
2337            a+1, (1<<(2*a+1)) - s->no_rounding,
2338            s->h_edge_pos, s->v_edge_pos);
2339     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2340            ox + s->sprite_delta[0][0]*8,
2341            oy + s->sprite_delta[1][0]*8,
2342            s->sprite_delta[0][0], s->sprite_delta[0][1],
2343            s->sprite_delta[1][0], s->sprite_delta[1][1],
2344            a+1, (1<<(2*a+1)) - s->no_rounding,
2345            s->h_edge_pos, s->v_edge_pos);
2346
2347     if(s->flags&CODEC_FLAG_GRAY) return;
2348
2349     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2350     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2351
2352     ptr = ref_picture[1];
2353     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2354            ox,
2355            oy,
2356            s->sprite_delta[0][0], s->sprite_delta[0][1],
2357            s->sprite_delta[1][0], s->sprite_delta[1][1],
2358            a+1, (1<<(2*a+1)) - s->no_rounding,
2359            s->h_edge_pos>>1, s->v_edge_pos>>1);
2360
2361     ptr = ref_picture[2];
2362     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2363            ox,
2364            oy,
2365            s->sprite_delta[0][0], s->sprite_delta[0][1],
2366            s->sprite_delta[1][0], s->sprite_delta[1][1],
2367            a+1, (1<<(2*a+1)) - s->no_rounding,
2368            s->h_edge_pos>>1, s->v_edge_pos>>1);
2369 }
2370
2371 /**
2372  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2373  * @param buf destination buffer
2374  * @param src source buffer
2375  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2376  * @param block_w width of block
2377  * @param block_h height of block
2378  * @param src_x x coordinate of the top left sample of the block in the source buffer
2379  * @param src_y y coordinate of the top left sample of the block in the source buffer
2380  * @param w width of the source buffer
2381  * @param h height of the source buffer
2382  */
2383 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2384                                     int src_x, int src_y, int w, int h){
2385     int x, y;
2386     int start_y, start_x, end_y, end_x;
2387
2388     if(src_y>= h){
2389         src+= (h-1-src_y)*linesize;
2390         src_y=h-1;
2391     }else if(src_y<=-block_h){
2392         src+= (1-block_h-src_y)*linesize;
2393         src_y=1-block_h;
2394     }
2395     if(src_x>= w){
2396         src+= (w-1-src_x);
2397         src_x=w-1;
2398     }else if(src_x<=-block_w){
2399         src+= (1-block_w-src_x);
2400         src_x=1-block_w;
2401     }
2402
2403     start_y= FFMAX(0, -src_y);
2404     start_x= FFMAX(0, -src_x);
2405     end_y= FFMIN(block_h, h-src_y);
2406     end_x= FFMIN(block_w, w-src_x);
2407
2408     // copy existing part
2409     for(y=start_y; y<end_y; y++){
2410         for(x=start_x; x<end_x; x++){
2411             buf[x + y*linesize]= src[x + y*linesize];
2412         }
2413     }
2414
2415     //top
2416     for(y=0; y<start_y; y++){
2417         for(x=start_x; x<end_x; x++){
2418             buf[x + y*linesize]= buf[x + start_y*linesize];
2419         }
2420     }
2421
2422     //bottom
2423     for(y=end_y; y<block_h; y++){
2424         for(x=start_x; x<end_x; x++){
2425             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2426         }
2427     }
2428
2429     for(y=0; y<block_h; y++){
2430        //left
2431         for(x=0; x<start_x; x++){
2432             buf[x + y*linesize]= buf[start_x + y*linesize];
2433         }
2434
2435        //right
2436         for(x=end_x; x<block_w; x++){
2437             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2438         }
2439     }
2440 }
2441
2442 static inline int hpel_motion(MpegEncContext *s,
2443                                   uint8_t *dest, uint8_t *src,
2444                                   int field_based, int field_select,
2445                                   int src_x, int src_y,
2446                                   int width, int height, int stride,
2447                                   int h_edge_pos, int v_edge_pos,
2448                                   int w, int h, op_pixels_func *pix_op,
2449                                   int motion_x, int motion_y)
2450 {
2451     int dxy;
2452     int emu=0;
2453
2454     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2455     src_x += motion_x >> 1;
2456     src_y += motion_y >> 1;
2457
2458     /* WARNING: do no forget half pels */
2459     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2460     if (src_x == width)
2461         dxy &= ~1;
2462     src_y = clip(src_y, -16, height);
2463     if (src_y == height)
2464         dxy &= ~2;
2465     src += src_y * stride + src_x;
2466
2467     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2468         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2469            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2470             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2471                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2472             src= s->edge_emu_buffer;
2473             emu=1;
2474         }
2475     }
2476     if(field_select)
2477         src += s->linesize;
2478     pix_op[dxy](dest, src, stride, h);
2479     return emu;
2480 }
2481
2482 /* apply one mpeg motion vector to the three components */
2483 static always_inline void mpeg_motion(MpegEncContext *s,
2484                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2485                                int field_based, int bottom_field, int field_select,
2486                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2487                                int motion_x, int motion_y, int h)
2488 {
2489     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2490     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2491
2492 #if 0
2493 if(s->quarter_sample)
2494 {
2495     motion_x>>=1;
2496     motion_y>>=1;
2497 }
2498 #endif
2499
2500     v_edge_pos = s->v_edge_pos >> field_based;
2501     linesize   = s->current_picture.linesize[0] << field_based;
2502     uvlinesize = s->current_picture.linesize[1] << field_based;
2503
2504     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2505     src_x = s->mb_x* 16               + (motion_x >> 1);
2506     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2507
2508     if (s->out_format == FMT_H263) {
2509         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2510             mx = (motion_x>>1)|(motion_x&1);
2511             my = motion_y >>1;
2512             uvdxy = ((my & 1) << 1) | (mx & 1);
2513             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2514             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2515         }else{
2516             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2517             uvsrc_x = src_x>>1;
2518             uvsrc_y = src_y>>1;
2519         }
2520     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2521         mx = motion_x / 4;
2522         my = motion_y / 4;
2523         uvdxy = 0;
2524         uvsrc_x = s->mb_x*8 + mx;
2525         uvsrc_y = s->mb_y*8 + my;
2526     } else {
2527         if(s->chroma_y_shift){
2528             mx = motion_x / 2;
2529             my = motion_y / 2;
2530             uvdxy = ((my & 1) << 1) | (mx & 1);
2531             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2532             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2533         } else {
2534             if(s->chroma_x_shift){
2535             //Chroma422
2536                 mx = motion_x / 2;
2537                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2538                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2539                 uvsrc_y = src_y;
2540             } else {
2541             //Chroma444
2542                 uvdxy = dxy;
2543                 uvsrc_x = src_x;
2544                 uvsrc_y = src_y;
2545             }
2546         }
2547     }
2548
2549     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2550     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2551     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2552
2553     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2554        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2555             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2556                s->codec_id == CODEC_ID_MPEG1VIDEO){
2557                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2558                 return ;
2559             }
2560             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2561                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2562             ptr_y = s->edge_emu_buffer;
2563             if(!(s->flags&CODEC_FLAG_GRAY)){
2564                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2565                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2566                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2567                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2568                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2569                 ptr_cb= uvbuf;
2570                 ptr_cr= uvbuf+16;
2571             }
2572     }
2573
2574     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2575         dest_y += s->linesize;
2576         dest_cb+= s->uvlinesize;
2577         dest_cr+= s->uvlinesize;
2578     }
2579
2580     if(field_select){
2581         ptr_y += s->linesize;
2582         ptr_cb+= s->uvlinesize;
2583         ptr_cr+= s->uvlinesize;
2584     }
2585
2586     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2587
2588     if(!(s->flags&CODEC_FLAG_GRAY)){
2589         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2590         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2591     }
2592 }
2593 //FIXME move to dsputil, avg variant, 16x16 version
2594 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2595     int x;
2596     uint8_t * const top   = src[1];
2597     uint8_t * const left  = src[2];
2598     uint8_t * const mid   = src[0];
2599     uint8_t * const right = src[3];
2600     uint8_t * const bottom= src[4];
2601 #define OBMC_FILTER(x, t, l, m, r, b)\
2602     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2603 #define OBMC_FILTER4(x, t, l, m, r, b)\
2604     OBMC_FILTER(x         , t, l, m, r, b);\
2605     OBMC_FILTER(x+1       , t, l, m, r, b);\
2606     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2607     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2608
2609     x=0;
2610     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2611     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2612     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2613     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2614     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2615     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2616     x+= stride;
2617     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2618     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2619     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2620     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2621     x+= stride;
2622     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2623     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2624     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2625     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2626     x+= 2*stride;
2627     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2628     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2629     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2630     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2631     x+= 2*stride;
2632     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2633     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2634     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2635     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2636     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2637     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2638     x+= stride;
2639     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2640     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2641     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2642     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2643 }
2644
2645 /* obmc for 1 8x8 luma block */
2646 static inline void obmc_motion(MpegEncContext *s,
2647                                uint8_t *dest, uint8_t *src,
2648                                int src_x, int src_y,
2649                                op_pixels_func *pix_op,
2650                                int16_t mv[5][2]/* mid top left right bottom*/)
2651 #define MID    0
2652 {
2653     int i;
2654     uint8_t *ptr[5];
2655
2656     assert(s->quarter_sample==0);
2657
2658     for(i=0; i<5; i++){
2659         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2660             ptr[i]= ptr[MID];
2661         }else{
2662             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2663             hpel_motion(s, ptr[i], src, 0, 0,
2664                         src_x, src_y,
2665                         s->width, s->height, s->linesize,
2666                         s->h_edge_pos, s->v_edge_pos,
2667                         8, 8, pix_op,
2668                         mv[i][0], mv[i][1]);
2669         }
2670     }
2671
2672     put_obmc(dest, ptr, s->linesize);
2673 }
2674
2675 static inline void qpel_motion(MpegEncContext *s,
2676                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2677                                int field_based, int bottom_field, int field_select,
2678                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2679                                qpel_mc_func (*qpix_op)[16],
2680                                int motion_x, int motion_y, int h)
2681 {
2682     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2683     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
2684
2685     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2686     src_x = s->mb_x *  16                 + (motion_x >> 2);
2687     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2688
2689     v_edge_pos = s->v_edge_pos >> field_based;
2690     linesize = s->linesize << field_based;
2691     uvlinesize = s->uvlinesize << field_based;
2692
2693     if(field_based){
2694         mx= motion_x/2;
2695         my= motion_y>>1;
2696     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2697         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2698         mx= (motion_x>>1) + rtab[motion_x&7];
2699         my= (motion_y>>1) + rtab[motion_y&7];
2700     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2701         mx= (motion_x>>1)|(motion_x&1);
2702         my= (motion_y>>1)|(motion_y&1);
2703     }else{
2704         mx= motion_x/2;
2705         my= motion_y/2;
2706     }
2707     mx= (mx>>1)|(mx&1);
2708     my= (my>>1)|(my&1);
2709
2710     uvdxy= (mx&1) | ((my&1)<<1);
2711     mx>>=1;
2712     my>>=1;
2713
2714     uvsrc_x = s->mb_x *  8                 + mx;
2715     uvsrc_y = s->mb_y * (8 >> field_based) + my;
2716
2717     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
2718     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2719     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2720
2721     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
2722        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
2723         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2724                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2725         ptr_y= s->edge_emu_buffer;
2726         if(!(s->flags&CODEC_FLAG_GRAY)){
2727             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
2728             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
2729                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2730             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
2731                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2732             ptr_cb= uvbuf;
2733             ptr_cr= uvbuf + 16;
2734         }
2735     }
2736
2737     if(!field_based)
2738         qpix_op[0][dxy](dest_y, ptr_y, linesize);
2739     else{
2740         if(bottom_field){
2741             dest_y += s->linesize;
2742             dest_cb+= s->uvlinesize;
2743             dest_cr+= s->uvlinesize;
2744         }
2745
2746         if(field_select){
2747             ptr_y  += s->linesize;
2748             ptr_cb += s->uvlinesize;
2749             ptr_cr += s->uvlinesize;
2750         }
2751         //damn interlaced mode
2752         //FIXME boundary mirroring is not exactly correct here
2753         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
2754         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
2755     }
2756     if(!(s->flags&CODEC_FLAG_GRAY)){
2757         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
2758         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
2759     }
2760 }
2761
2762 inline int ff_h263_round_chroma(int x){
2763     if (x >= 0)
2764         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2765     else {
2766         x = -x;
2767         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2768     }
2769 }
2770
2771 /**
2772  * h263 chorma 4mv motion compensation.
2773  */
2774 static inline void chroma_4mv_motion(MpegEncContext *s,
2775                                      uint8_t *dest_cb, uint8_t *dest_cr,
2776                                      uint8_t **ref_picture,
2777                                      op_pixels_func *pix_op,
2778                                      int mx, int my){
2779     int dxy, emu=0, src_x, src_y, offset;
2780     uint8_t *ptr;
2781
2782     /* In case of 8X8, we construct a single chroma motion vector
2783        with a special rounding */
2784     mx= ff_h263_round_chroma(mx);
2785     my= ff_h263_round_chroma(my);
2786
2787     dxy = ((my & 1) << 1) | (mx & 1);
2788     mx >>= 1;
2789     my >>= 1;
2790
2791     src_x = s->mb_x * 8 + mx;
2792     src_y = s->mb_y * 8 + my;
2793     src_x = clip(src_x, -8, s->width/2);
2794     if (src_x == s->width/2)
2795         dxy &= ~1;
2796     src_y = clip(src_y, -8, s->height/2);
2797     if (src_y == s->height/2)
2798         dxy &= ~2;
2799
2800     offset = (src_y * (s->uvlinesize)) + src_x;
2801     ptr = ref_picture[1] + offset;
2802     if(s->flags&CODEC_FLAG_EMU_EDGE){
2803         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
2804            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
2805             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2806             ptr= s->edge_emu_buffer;
2807             emu=1;
2808         }
2809     }
2810     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
2811
2812     ptr = ref_picture[2] + offset;
2813     if(emu){
2814         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2815         ptr= s->edge_emu_buffer;
2816     }
2817     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
2818 }
2819
2820 /**
2821  * motion compesation of a single macroblock
2822  * @param s context
2823  * @param dest_y luma destination pointer
2824  * @param dest_cb chroma cb/u destination pointer
2825  * @param dest_cr chroma cr/v destination pointer
2826  * @param dir direction (0->forward, 1->backward)
2827  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2828  * @param pic_op halfpel motion compensation function (average or put normally)
2829  * @param pic_op qpel motion compensation function (average or put normally)
2830  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2831  */
2832 static inline void MPV_motion(MpegEncContext *s,
2833                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2834                               int dir, uint8_t **ref_picture,
2835                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2836 {
2837     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
2838     int mb_x, mb_y, i;
2839     uint8_t *ptr, *dest;
2840
2841     mb_x = s->mb_x;
2842     mb_y = s->mb_y;
2843
2844     if(s->obmc && s->pict_type != B_TYPE){
2845         int16_t mv_cache[4][4][2];
2846         const int xy= s->mb_x + s->mb_y*s->mb_stride;
2847         const int mot_stride= s->b8_stride;
2848         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
2849
2850         assert(!s->mb_skiped);
2851
2852         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
2853         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2854         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2855
2856         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
2857             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
2858         }else{
2859             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
2860         }
2861
2862         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
2863             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
2864             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
2865         }else{
2866             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
2867             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
2868         }
2869
2870         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
2871             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
2872             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
2873         }else{
2874             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
2875             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
2876         }
2877
2878         mx = 0;
2879         my = 0;
2880         for(i=0;i<4;i++) {
2881             const int x= (i&1)+1;
2882             const int y= (i>>1)+1;
2883             int16_t mv[5][2]= {
2884                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
2885                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
2886                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
2887                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
2888                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
2889             //FIXME cleanup
2890             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2891                         ref_picture[0],
2892                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2893                         pix_op[1],
2894                         mv);
2895
2896             mx += mv[0][0];
2897             my += mv[0][1];
2898         }
2899         if(!(s->flags&CODEC_FLAG_GRAY))
2900             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2901
2902         return;
2903     }
2904
2905     switch(s->mv_type) {
2906     case MV_TYPE_16X16:
2907 #ifdef CONFIG_RISKY
2908         if(s->mcsel){
2909             if(s->real_sprite_warping_points==1){
2910                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
2911                             ref_picture);
2912             }else{
2913                 gmc_motion(s, dest_y, dest_cb, dest_cr,
2914                             ref_picture);
2915             }
2916         }else if(s->quarter_sample){
2917             qpel_motion(s, dest_y, dest_cb, dest_cr,
2918                         0, 0, 0,
2919                         ref_picture, pix_op, qpix_op,
2920                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2921         }else if(s->mspel){
2922             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2923                         ref_picture, pix_op,
2924                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2925         }else
2926 #endif
2927         {
2928             mpeg_motion(s, dest_y, dest_cb, dest_cr,
2929                         0, 0, 0,
2930                         ref_picture, pix_op,
2931                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2932         }
2933         break;
2934     case MV_TYPE_8X8:
2935         mx = 0;
2936         my = 0;
2937         if(s->quarter_sample){
2938             for(i=0;i<4;i++) {
2939                 motion_x = s->mv[dir][i][0];
2940                 motion_y = s->mv[dir][i][1];
2941
2942                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2943                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2944                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2945
2946                 /* WARNING: do no forget half pels */
2947                 src_x = clip(src_x, -16, s->width);
2948                 if (src_x == s->width)
2949                     dxy &= ~3;
2950                 src_y = clip(src_y, -16, s->height);
2951                 if (src_y == s->height)
2952                     dxy &= ~12;
2953
2954                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2955                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2956                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
2957                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
2958                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2959                         ptr= s->edge_emu_buffer;
2960                     }
2961                 }
2962                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2963                 qpix_op[1][dxy](dest, ptr, s->linesize);
2964
2965                 mx += s->mv[dir][i][0]/2;
2966                 my += s->mv[dir][i][1]/2;
2967             }
2968         }else{
2969             for(i=0;i<4;i++) {
2970                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2971                             ref_picture[0], 0, 0,
2972                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2973                             s->width, s->height, s->linesize,
2974                             s->h_edge_pos, s->v_edge_pos,
2975                             8, 8, pix_op[1],
2976                             s->mv[dir][i][0], s->mv[dir][i][1]);
2977
2978                 mx += s->mv[dir][i][0];
2979                 my += s->mv[dir][i][1];
2980             }
2981         }
2982
2983         if(!(s->flags&CODEC_FLAG_GRAY))
2984             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2985         break;
2986     case MV_TYPE_FIELD:
2987         if (s->picture_structure == PICT_FRAME) {
2988             if(s->quarter_sample){
2989                 for(i=0; i<2; i++){
2990                     qpel_motion(s, dest_y, dest_cb, dest_cr,
2991                                 1, i, s->field_select[dir][i],
2992                                 ref_picture, pix_op, qpix_op,
2993                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
2994                 }
2995             }else{
2996                 /* top field */
2997                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
2998                             1, 0, s->field_select[dir][0],
2999                             ref_picture, pix_op,
3000                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3001                 /* bottom field */
3002                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3003                             1, 1, s->field_select[dir][1],
3004                             ref_picture, pix_op,
3005                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3006             }
3007         } else {
3008             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3009                 ref_picture= s->current_picture_ptr->data;
3010             }
3011
3012             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3013                         0, 0, s->field_select[dir][0],
3014                         ref_picture, pix_op,
3015                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3016         }
3017         break;
3018     case MV_TYPE_16X8:
3019         for(i=0; i<2; i++){
3020             uint8_t ** ref2picture;
3021
3022             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3023                 ref2picture= ref_picture;
3024             }else{
3025                 ref2picture= s->current_picture_ptr->data;
3026             }
3027
3028             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3029                         0, 0, s->field_select[dir][i],
3030                         ref2picture, pix_op,
3031                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3032
3033             dest_y += 16*s->linesize;
3034             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3035             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3036         }
3037         break;
3038     case MV_TYPE_DMV:
3039         if(s->picture_structure == PICT_FRAME){
3040             for(i=0; i<2; i++){
3041                 int j;
3042                 for(j=0; j<2; j++){
3043                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3044                                 1, j, j^i,
3045                                 ref_picture, pix_op,
3046                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3047                 }
3048                 pix_op = s->dsp.avg_pixels_tab;
3049             }
3050         }else{
3051             for(i=0; i<2; i++){
3052                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3053                             0, 0, s->picture_structure != i+1,
3054                             ref_picture, pix_op,
3055                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3056
3057                 // after put we make avg of the same block
3058                 pix_op=s->dsp.avg_pixels_tab;
3059
3060                 //opposite parity is always in the same frame if this is second field
3061                 if(!s->first_field){
3062                     ref_picture = s->current_picture_ptr->data;
3063                 }
3064             }
3065         }
3066     break;
3067     default: assert(0);
3068     }
3069 }
3070
3071
3072 /* put block[] to dest[] */
3073 static inline void put_dct(MpegEncContext *s,
3074                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3075 {
3076     s->dct_unquantize_intra(s, block, i, qscale);
3077     s->dsp.idct_put (dest, line_size, block);
3078 }
3079
3080 /* add block[] to dest[] */
3081 static inline void add_dct(MpegEncContext *s,
3082                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3083 {
3084     if (s->block_last_index[i] >= 0) {
3085         s->dsp.idct_add (dest, line_size, block);
3086     }
3087 }
3088
3089 static inline void add_dequant_dct(MpegEncContext *s,
3090                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3091 {
3092     if (s->block_last_index[i] >= 0) {
3093         s->dct_unquantize_inter(s, block, i, qscale);
3094
3095         s->dsp.idct_add (dest, line_size, block);
3096     }
3097 }
3098
3099 /**
3100  * cleans dc, ac, coded_block for the current non intra MB
3101  */
3102 void ff_clean_intra_table_entries(MpegEncContext *s)
3103 {
3104     int wrap = s->b8_stride;
3105     int xy = s->block_index[0];
3106
3107     s->dc_val[0][xy           ] =
3108     s->dc_val[0][xy + 1       ] =
3109     s->dc_val[0][xy     + wrap] =
3110     s->dc_val[0][xy + 1 + wrap] = 1024;
3111     /* ac pred */
3112     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3113     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3114     if (s->msmpeg4_version>=3) {
3115         s->coded_block[xy           ] =
3116         s->coded_block[xy + 1       ] =
3117         s->coded_block[xy     + wrap] =
3118         s->coded_block[xy + 1 + wrap] = 0;
3119     }
3120     /* chroma */
3121     wrap = s->mb_stride;
3122     xy = s->mb_x + s->mb_y * wrap;
3123     s->dc_val[1][xy] =
3124     s->dc_val[2][xy] = 1024;
3125     /* ac pred */
3126     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3127     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3128
3129     s->mbintra_table[xy]= 0;
3130 }
3131
3132 /* generic function called after a macroblock has been parsed by the
3133    decoder or after it has been encoded by the encoder.
3134
3135    Important variables used:
3136    s->mb_intra : true if intra macroblock
3137    s->mv_dir   : motion vector direction
3138    s->mv_type  : motion vector type
3139    s->mv       : motion vector
3140    s->interlaced_dct : true if interlaced dct used (mpeg2)
3141  */
3142 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
3143 {
3144     int mb_x, mb_y;
3145     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3146 #ifdef HAVE_XVMC
3147     if(s->avctx->xvmc_acceleration){
3148         XVMC_decode_mb(s);//xvmc uses pblocks
3149         return;
3150     }
3151 #endif
3152
3153     mb_x = s->mb_x;
3154     mb_y = s->mb_y;
3155
3156     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3157        /* save DCT coefficients */
3158        int i,j;
3159        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3160        for(i=0; i<6; i++)
3161            for(j=0; j<64; j++)
3162                *dct++ = block[i][s->dsp.idct_permutation[j]];
3163     }
3164
3165     s->current_picture.qscale_table[mb_xy]= s->qscale;
3166
3167     /* update DC predictors for P macroblocks */
3168     if (!s->mb_intra) {
3169         if (s->h263_pred || s->h263_aic) {
3170             if(s->mbintra_table[mb_xy])
3171                 ff_clean_intra_table_entries(s);
3172         } else {
3173             s->last_dc[0] =
3174             s->last_dc[1] =
3175             s->last_dc[2] = 128 << s->intra_dc_precision;
3176         }
3177     }
3178     else if (s->h263_pred || s->h263_aic)
3179         s->mbintra_table[mb_xy]=1;
3180
3181     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3182         uint8_t *dest_y, *dest_cb, *dest_cr;
3183         int dct_linesize, dct_offset;
3184         op_pixels_func (*op_pix)[4];
3185         qpel_mc_func (*op_qpix)[16];
3186         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3187         const int uvlinesize= s->current_picture.linesize[1];
3188         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band;
3189
3190         /* avoid copy if macroblock skipped in last frame too */
3191         /* skip only during decoding as we might trash the buffers during encoding a bit */
3192         if(!s->encoding){
3193             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3194             const int age= s->current_picture.age;
3195
3196             assert(age);
3197
3198             if (s->mb_skiped) {
3199                 s->mb_skiped= 0;
3200                 assert(s->pict_type!=I_TYPE);
3201
3202                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3203                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3204
3205                 /* if previous was skipped too, then nothing to do !  */
3206                 if (*mbskip_ptr >= age && s->current_picture.reference){
3207                     return;
3208                 }
3209             } else if(!s->current_picture.reference){
3210                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3211                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3212             } else{
3213                 *mbskip_ptr = 0; /* not skipped */
3214             }
3215         }
3216
3217         dct_linesize = linesize << s->interlaced_dct;
3218         dct_offset =(s->interlaced_dct)? linesize : linesize*8;
3219
3220         if(readable){
3221             dest_y=  s->dest[0];
3222             dest_cb= s->dest[1];
3223             dest_cr= s->dest[2];
3224         }else{
3225             dest_y = s->b_scratchpad;
3226             dest_cb= s->b_scratchpad+16*linesize;
3227             dest_cr= s->b_scratchpad+32*linesize;
3228         }
3229         if (!s->mb_intra) {
3230             /* motion handling */
3231             /* decoding or more than one mb_type (MC was allready done otherwise) */
3232             if(!s->encoding){
3233                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
3234                     op_pix = s->dsp.put_pixels_tab;
3235                     op_qpix= s->dsp.put_qpel_pixels_tab;
3236                 }else{
3237                     op_pix = s->dsp.put_no_rnd_pixels_tab;
3238                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3239                 }
3240
3241                 if (s->mv_dir & MV_DIR_FORWARD) {
3242                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3243                     op_pix = s->dsp.avg_pixels_tab;
3244                     op_qpix= s->dsp.avg_qpel_pixels_tab;
3245                 }
3246                 if (s->mv_dir & MV_DIR_BACKWARD) {
3247                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3248                 }
3249             }
3250
3251             /* skip dequant / idct if we are really late ;) */
3252             if(s->hurry_up>1) return;
3253
3254             /* add dct residue */
3255             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3256                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3257                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3258                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3259                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3260                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3261
3262                 if(!(s->flags&CODEC_FLAG_GRAY)){
3263                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3264                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3265                 }
3266             } else if(s->codec_id != CODEC_ID_WMV2){
3267                 add_dct(s, block[0], 0, dest_y, dct_linesize);
3268                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
3269                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
3270                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
3271
3272                 if(!(s->flags&CODEC_FLAG_GRAY)){
3273                     if(s->chroma_y_shift){//Chroma420
3274                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3275                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3276                     }else{
3277                         //chroma422
3278                         dct_linesize = uvlinesize << s->interlaced_dct;
3279                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3280
3281                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3282                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3283                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3284                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3285                         if(!s->chroma_x_shift){//Chroma444
3286                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3287                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3288                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3289                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3290                         }
3291                     }
3292                 }//fi gray
3293             }
3294 #ifdef CONFIG_RISKY
3295             else{
3296                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3297             }
3298 #endif
3299         } else {
3300             /* dct only in intra block */
3301             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3302                 put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3303                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3304                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3305                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3306
3307                 if(!(s->flags&CODEC_FLAG_GRAY)){
3308                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3309                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3310                 }
3311             }else{
3312                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
3313                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
3314                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
3315                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
3316
3317                 if(!(s->flags&CODEC_FLAG_GRAY)){
3318                     if(s->chroma_y_shift){
3319                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3320                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3321                     }else{
3322
3323                         dct_linesize = uvlinesize << s->interlaced_dct;
3324                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3325
3326                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3327                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3328                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3329                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3330                         if(!s->chroma_x_shift){//Chroma444
3331                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3332                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3333                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3334                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3335                         }
3336                     }
3337                 }//gray
3338             }
3339         }
3340         if(!readable){
3341             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3342             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3343             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3344         }
3345     }
3346 }
3347
3348 #ifdef CONFIG_ENCODERS
3349
3350 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3351 {
3352     static const char tab[64]=
3353         {3,2,2,1,1,1,1,1,
3354          1,1,1,1,1,1,1,1,
3355          1,1,1,1,1,1,1,1,
3356          0,0,0,0,0,0,0,0,
3357          0,0,0,0,0,0,0,0,
3358          0,0,0,0,0,0,0,0,
3359          0,0,0,0,0,0,0,0,
3360          0,0,0,0,0,0,0,0};
3361     int score=0;
3362     int run=0;
3363     int i;
3364     DCTELEM *block= s->block[n];
3365     const int last_index= s->block_last_index[n];
3366     int skip_dc;
3367
3368     if(threshold<0){
3369         skip_dc=0;
3370         threshold= -threshold;
3371     }else
3372         skip_dc=1;
3373
3374     /* are all which we could set to zero are allready zero? */
3375     if(last_index<=skip_dc - 1) return;
3376
3377     for(i=0; i<=last_index; i++){
3378         const int j = s->intra_scantable.permutated[i];
3379         const int level = ABS(block[j]);
3380         if(level==1){
3381             if(skip_dc && i==0) continue;
3382             score+= tab[run];
3383             run=0;
3384         }else if(level>1){
3385             return;
3386         }else{
3387             run++;
3388         }
3389     }
3390     if(score >= threshold) return;
3391     for(i=skip_dc; i<=last_index; i++){
3392         const int j = s->intra_scantable.permutated[i];
3393         block[j]=0;
3394     }
3395     if(block[0]) s->block_last_index[n]= 0;
3396     else         s->block_last_index[n]= -1;
3397 }
3398
3399 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3400 {
3401     int i;
3402     const int maxlevel= s->max_qcoeff;
3403     const int minlevel= s->min_qcoeff;
3404     int overflow=0;
3405
3406     if(s->mb_intra){
3407         i=1; //skip clipping of intra dc
3408     }else
3409         i=0;
3410
3411     for(;i<=last_index; i++){
3412         const int j= s->intra_scantable.permutated[i];
3413         int level = block[j];
3414
3415         if     (level>maxlevel){
3416             level=maxlevel;
3417             overflow++;
3418         }else if(level<minlevel){
3419             level=minlevel;
3420             overflow++;
3421         }
3422
3423         block[j]= level;
3424     }
3425
3426     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3427         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3428 }
3429
3430 #endif //CONFIG_ENCODERS
3431
3432 /**
3433  *
3434  * @param h is the normal height, this will be reduced automatically if needed for the last row
3435  */
3436 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3437     if (s->avctx->draw_horiz_band) {
3438         AVFrame *src;
3439         int offset[4];
3440
3441         if(s->picture_structure != PICT_FRAME){
3442             h <<= 1;
3443             y <<= 1;
3444             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3445         }
3446
3447         h= FFMIN(h, s->height - y);
3448
3449         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
3450             src= (AVFrame*)s->current_picture_ptr;
3451         else if(s->last_picture_ptr)
3452             src= (AVFrame*)s->last_picture_ptr;
3453         else
3454             return;
3455
3456         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3457             offset[0]=
3458             offset[1]=
3459             offset[2]=
3460             offset[3]= 0;
3461         }else{
3462             offset[0]= y * s->linesize;;
3463             offset[1]=
3464             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3465             offset[3]= 0;
3466         }
3467
3468         emms_c();
3469
3470         s->avctx->draw_horiz_band(s->avctx, src, offset,
3471                                   y, s->picture_structure, h);
3472     }
3473 }
3474
3475 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3476     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3477     const int uvlinesize= s->current_picture.linesize[1];
3478
3479     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
3480     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
3481     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
3482     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3483     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3484     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3485     //block_index is not used by mpeg2, so it is not affected by chroma_format
3486
3487     s->dest[0] = s->current_picture.data[0] + (s->mb_x - 1)*16;
3488     s->dest[1] = s->current_picture.data[1] + (s->mb_x - 1)*(16 >> s->chroma_x_shift);
3489     s->dest[2] = s->current_picture.data[2] + (s->mb_x - 1)*(16 >> s->chroma_x_shift);
3490
3491     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
3492     {
3493         s->dest[0] += s->mb_y *   linesize * 16;
3494         s->dest[1] += s->mb_y * uvlinesize * (16 >> s->chroma_y_shift);
3495         s->dest[2] += s->mb_y * uvlinesize * (16 >> s->chroma_y_shift);
3496     }
3497 }
3498
3499 #ifdef CONFIG_ENCODERS
3500
3501 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
3502     int x, y;
3503 //FIXME optimize
3504     for(y=0; y<8; y++){
3505         for(x=0; x<8; x++){
3506             int x2, y2;
3507             int sum=0;
3508             int sqr=0;
3509             int count=0;
3510
3511             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
3512                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
3513                     int v= ptr[x2 + y2*stride];
3514                     sum += v;
3515                     sqr += v*v;
3516                     count++;
3517                 }
3518             }
3519             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
3520         }
3521     }
3522 }
3523
3524 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3525 {
3526     int16_t weight[6][64];
3527     DCTELEM orig[6][64];
3528     const int mb_x= s->mb_x;
3529     const int mb_y= s->mb_y;
3530     int i;
3531     int skip_dct[6];
3532     int dct_offset   = s->linesize*8; //default for progressive frames
3533     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3534     int wrap_y, wrap_c;
3535
3536     for(i=0; i<6; i++) skip_dct[i]=0;
3537
3538     if(s->adaptive_quant){
3539         const int last_qp= s->qscale;
3540         const int mb_xy= mb_x + mb_y*s->mb_stride;
3541
3542         s->lambda= s->lambda_table[mb_xy];
3543         update_qscale(s);
3544
3545         if(!(s->flags&CODEC_FLAG_QP_RD)){
3546             s->dquant= s->qscale - last_qp;
3547
3548             if(s->out_format==FMT_H263){
3549                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
3550
3551                 if(s->codec_id==CODEC_ID_MPEG4){
3552                     if(!s->mb_intra){
3553                         if(s->pict_type == B_TYPE){
3554                             if(s->dquant&1)
3555                                 s->dquant= (s->dquant/2)*2;
3556                             if(s->mv_dir&MV_DIRECT)
3557                                 s->dquant= 0;
3558                         }
3559                         if(s->mv_type==MV_TYPE_8X8)
3560                             s->dquant=0;
3561                     }
3562                 }
3563             }
3564         }
3565         ff_set_qscale(s, last_qp + s->dquant);
3566     }else if(s->flags&CODEC_FLAG_QP_RD)
3567         ff_set_qscale(s, s->qscale + s->dquant);
3568
3569     wrap_y = s->linesize;
3570     wrap_c = s->uvlinesize;
3571     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
3572     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
3573     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
3574
3575     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
3576         ff_emulated_edge_mc(s->edge_emu_buffer            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
3577         ptr_y= s->edge_emu_buffer;
3578         ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3579         ptr_cb= s->edge_emu_buffer+18*wrap_y;
3580         ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y+9, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3581         ptr_cr= s->edge_emu_buffer+18*wrap_y+9;
3582     }
3583
3584     if (s->mb_intra) {
3585         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3586             int progressive_score, interlaced_score;
3587
3588             s->interlaced_dct=0;
3589             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
3590                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
3591
3592             if(progressive_score > 0){
3593                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
3594                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
3595                 if(progressive_score > interlaced_score){
3596                     s->interlaced_dct=1;
3597
3598                     dct_offset= wrap_y;
3599                     wrap_y<<=1;
3600                 }
3601             }
3602         }
3603
3604         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
3605         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
3606         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
3607         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
3608
3609         if(s->flags&CODEC_FLAG_GRAY){
3610             skip_dct[4]= 1;
3611             skip_dct[5]= 1;
3612         }else{
3613             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
3614             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
3615         }
3616     }else{
3617         op_pixels_func (*op_pix)[4];
3618         qpel_mc_func (*op_qpix)[16];
3619         uint8_t *dest_y, *dest_cb, *dest_cr;
3620
3621         dest_y  = s->dest[0];
3622         dest_cb = s->dest[1];
3623         dest_cr = s->dest[2];
3624
3625         if ((!s->no_rounding) || s->pict_type==B_TYPE){
3626             op_pix = s->dsp.put_pixels_tab;
3627             op_qpix= s->dsp.put_qpel_pixels_tab;
3628         }else{
3629             op_pix = s->dsp.put_no_rnd_pixels_tab;
3630             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3631         }
3632
3633         if (s->mv_dir & MV_DIR_FORWARD) {
3634             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3635             op_pix = s->dsp.avg_pixels_tab;
3636             op_qpix= s->dsp.avg_qpel_pixels_tab;
3637         }
3638         if (s->mv_dir & MV_DIR_BACKWARD) {
3639             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3640         }
3641
3642         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3643             int progressive_score, interlaced_score;
3644
3645             s->interlaced_dct=0;
3646             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
3647                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
3648
3649             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
3650
3651             if(progressive_score>0){
3652                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
3653                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
3654
3655                 if(progressive_score > interlaced_score){
3656                     s->interlaced_dct=1;
3657
3658                     dct_offset= wrap_y;
3659                     wrap_y<<=1;
3660                 }
3661             }
3662         }
3663
3664         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
3665         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
3666         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
3667         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
3668
3669         if(s->flags&CODEC_FLAG_GRAY){
3670             skip_dct[4]= 1;
3671             skip_dct[5]= 1;
3672         }else{
3673             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
3674             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
3675         }
3676         /* pre quantization */
3677         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
3678             //FIXME optimize
3679             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
3680             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
3681             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
3682             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
3683             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
3684             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
3685         }
3686     }
3687
3688     if(s->avctx->quantizer_noise_shaping){
3689         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
3690         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
3691         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
3692         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
3693         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
3694         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
3695         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
3696     }
3697
3698     /* DCT & quantize */
3699     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
3700     {
3701         for(i=0;i<6;i++) {
3702             if(!skip_dct[i]){
3703                 int overflow;
3704                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3705             // FIXME we could decide to change to quantizer instead of clipping
3706             // JS: I don't think that would be a good idea it could lower quality instead
3707             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3708                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3709             }else
3710                 s->block_last_index[i]= -1;
3711         }
3712         if(s->avctx->quantizer_noise_shaping){
3713             for(i=0;i<6;i++) {
3714                 if(!skip_dct[i]){
3715                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
3716                 }
3717             }
3718         }
3719
3720         if(s->luma_elim_threshold && !s->mb_intra)
3721             for(i=0; i<4; i++)
3722                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3723         if(s->chroma_elim_threshold && !s->mb_intra)
3724             for(i=4; i<6; i++)
3725                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3726
3727         if(s->flags & CODEC_FLAG_CBP_RD){
3728             for(i=0;i<6;i++) {
3729                 if(s->block_last_index[i] == -1)
3730                     s->coded_score[i]= INT_MAX/256;
3731             }
3732         }
3733     }
3734
3735     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3736         s->block_last_index[4]=
3737         s->block_last_index[5]= 0;
3738         s->block[4][0]=
3739         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3740     }
3741
3742     //non c quantize code returns incorrect block_last_index FIXME
3743     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
3744         for(i=0; i<6; i++){
3745             int j;
3746             if(s->block_last_index[i]>0){
3747                 for(j=63; j>0; j--){
3748                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
3749                 }
3750                 s->block_last_index[i]= j;
3751             }
3752         }
3753     }
3754
3755     /* huffman encode */
3756     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3757     case CODEC_ID_MPEG1VIDEO:
3758     case CODEC_ID_MPEG2VIDEO:
3759         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3760 #ifdef CONFIG_RISKY
3761     case CODEC_ID_MPEG4:
3762         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3763     case CODEC_ID_MSMPEG4V2:
3764     case CODEC_ID_MSMPEG4V3:
3765     case CODEC_ID_WMV1:
3766         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3767     case CODEC_ID_WMV2:
3768          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3769     case CODEC_ID_H263:
3770     case CODEC_ID_H263P:
3771     case CODEC_ID_FLV1:
3772     case CODEC_ID_RV10:
3773         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3774 #endif
3775     case CODEC_ID_MJPEG:
3776         mjpeg_encode_mb(s, s->block); break;
3777     default:
3778         assert(0);
3779     }
3780 }
3781
3782 #endif //CONFIG_ENCODERS
3783
3784 void ff_mpeg_flush(AVCodecContext *avctx){
3785     int i;
3786     MpegEncContext *s = avctx->priv_data;
3787
3788     if(s==NULL || s->picture==NULL)
3789         return;
3790
3791     for(i=0; i<MAX_PICTURE_COUNT; i++){
3792        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3793                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3794         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3795     }
3796     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
3797
3798     s->parse_context.state= -1;
3799     s->parse_context.frame_start_found= 0;
3800     s->parse_context.overread= 0;
3801     s->parse_context.overread_index= 0;
3802     s->parse_context.index= 0;
3803     s->parse_context.last_index= 0;
3804     s->bitstream_buffer_size=0;
3805 }
3806
3807 #ifdef CONFIG_ENCODERS
3808 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3809 {
3810     const uint16_t *srcw= (uint16_t*)src;
3811     int words= length>>4;
3812     int bits= length&15;
3813     int i;
3814
3815     if(length==0) return;
3816
3817     if(words < 16){
3818         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3819     }else if(put_bits_count(pb)&7){
3820         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3821     }else{
3822         for(i=0; put_bits_count(pb)&31; i++)
3823             put_bits(pb, 8, src[i]);
3824         flush_put_bits(pb);
3825         memcpy(pbBufPtr(pb), src+i, 2*words-i);
3826         skip_put_bytes(pb, 2*words-i);
3827     }
3828
3829     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
3830 }
3831
3832 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3833     int i;
3834
3835     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3836
3837     /* mpeg1 */
3838     d->mb_skip_run= s->mb_skip_run;
3839     for(i=0; i<3; i++)
3840         d->last_dc[i]= s->last_dc[i];
3841
3842     /* statistics */
3843     d->mv_bits= s->mv_bits;
3844     d->i_tex_bits= s->i_tex_bits;
3845     d->p_tex_bits= s->p_tex_bits;
3846     d->i_count= s->i_count;
3847     d->f_count= s->f_count;
3848     d->b_count= s->b_count;
3849     d->skip_count= s->skip_count;
3850     d->misc_bits= s->misc_bits;
3851     d->last_bits= 0;
3852
3853     d->mb_skiped= 0;
3854     d->qscale= s->qscale;
3855     d->dquant= s->dquant;
3856 }
3857
3858 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3859     int i;
3860
3861     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
3862     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3863
3864     /* mpeg1 */
3865     d->mb_skip_run= s->mb_skip_run;
3866     for(i=0; i<3; i++)
3867         d->last_dc[i]= s->last_dc[i];
3868
3869     /* statistics */
3870     d->mv_bits= s->mv_bits;
3871     d->i_tex_bits= s->i_tex_bits;
3872     d->p_tex_bits= s->p_tex_bits;
3873     d->i_count= s->i_count;
3874     d->f_count= s->f_count;
3875     d->b_count= s->b_count;
3876     d->skip_count= s->skip_count;
3877     d->misc_bits= s->misc_bits;
3878
3879     d->mb_intra= s->mb_intra;
3880     d->mb_skiped= s->mb_skiped;
3881     d->mv_type= s->mv_type;
3882     d->mv_dir= s->mv_dir;
3883     d->pb= s->pb;
3884     if(s->data_partitioning){
3885         d->pb2= s->pb2;
3886         d->tex_pb= s->tex_pb;
3887     }
3888     d->block= s->block;
3889     for(i=0; i<6; i++)
3890         d->block_last_index[i]= s->block_last_index[i];
3891     d->interlaced_dct= s->interlaced_dct;
3892     d->qscale= s->qscale;
3893 }
3894
3895 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
3896                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3897                            int *dmin, int *next_block, int motion_x, int motion_y)
3898 {
3899     int score;
3900     uint8_t *dest_backup[3];
3901
3902     copy_context_before_encode(s, backup, type);
3903
3904     s->block= s->blocks[*next_block];
3905     s->pb= pb[*next_block];
3906     if(s->data_partitioning){
3907         s->pb2   = pb2   [*next_block];
3908         s->tex_pb= tex_pb[*next_block];
3909     }
3910
3911     if(*next_block){
3912         memcpy(dest_backup, s->dest, sizeof(s->dest));
3913         s->dest[0] = s->rd_scratchpad;
3914         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
3915         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
3916         assert(s->linesize >= 32); //FIXME
3917     }
3918
3919     encode_mb(s, motion_x, motion_y);
3920
3921     score= put_bits_count(&s->pb);
3922     if(s->data_partitioning){
3923         score+= put_bits_count(&s->pb2);
3924         score+= put_bits_count(&s->tex_pb);
3925     }
3926
3927     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3928         MPV_decode_mb(s, s->block);
3929
3930         score *= s->lambda2;
3931         score += sse_mb(s) << FF_LAMBDA_SHIFT;
3932     }
3933
3934     if(*next_block){
3935         memcpy(s->dest, dest_backup, sizeof(s->dest));
3936     }
3937
3938     if(score<*dmin){
3939         *dmin= score;
3940         *next_block^=1;
3941
3942         copy_context_after_encode(best, s, type);
3943     }
3944 }
3945
3946 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3947     uint32_t *sq = squareTbl + 256;
3948     int acc=0;
3949     int x,y;
3950
3951     if(w==16 && h==16)
3952         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
3953     else if(w==8 && h==8)
3954         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
3955
3956     for(y=0; y<h; y++){
3957         for(x=0; x<w; x++){
3958             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3959         }
3960     }
3961
3962     assert(acc>=0);
3963
3964     return acc;
3965 }
3966
3967 static int sse_mb(MpegEncContext *s){
3968     int w= 16;
3969     int h= 16;
3970
3971     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3972     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3973
3974     if(w==16 && h==16)
3975       if(s->avctx->mb_cmp == FF_CMP_NSSE){
3976         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3977                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3978                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3979       }else{
3980         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3981                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3982                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3983       }
3984     else
3985         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3986                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3987                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3988 }
3989
3990 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
3991     MpegEncContext *s= arg;
3992
3993
3994     s->me.pre_pass=1;
3995     s->me.dia_size= s->avctx->pre_dia_size;
3996     s->first_slice_line=1;
3997     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
3998         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
3999             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4000         }
4001         s->first_slice_line=0;
4002     }
4003
4004     s->me.pre_pass=0;
4005
4006     return 0;
4007 }
4008
4009 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4010     MpegEncContext *s= arg;
4011
4012     s->me.dia_size= s->avctx->dia_size;
4013     s->first_slice_line=1;
4014     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4015         s->mb_x=0; //for block init below
4016         ff_init_block_index(s);
4017         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4018             s->block_index[0]+=2;
4019             s->block_index[1]+=2;
4020             s->block_index[2]+=2;
4021             s->block_index[3]+=2;
4022
4023             /* compute motion vector & mb_type and store in context */
4024             if(s->pict_type==B_TYPE)
4025                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4026             else
4027                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4028         }
4029         s->first_slice_line=0;
4030     }
4031     return 0;
4032 }
4033
4034 static int mb_var_thread(AVCodecContext *c, void *arg){
4035     MpegEncContext *s= arg;
4036     int mb_x, mb_y;
4037
4038     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4039         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4040             int xx = mb_x * 16;
4041             int yy = mb_y * 16;
4042             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4043             int varc;
4044             int sum = s->dsp.pix_sum(pix, s->linesize);
4045
4046             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4047
4048             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4049             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4050             s->me.mb_var_sum_temp    += varc;
4051         }
4052     }
4053     return 0;
4054 }
4055
4056 static void write_slice_end(MpegEncContext *s){
4057     if(s->codec_id==CODEC_ID_MPEG4){
4058         if(s->partitioned_frame){
4059             ff_mpeg4_merge_partitions(s);
4060         }
4061
4062         ff_mpeg4_stuffing(&s->pb);
4063     }else if(s->out_format == FMT_MJPEG){
4064         ff_mjpeg_stuffing(&s->pb);
4065     }
4066
4067     align_put_bits(&s->pb);
4068     flush_put_bits(&s->pb);
4069 }
4070
4071 static int encode_thread(AVCodecContext *c, void *arg){
4072     MpegEncContext *s= arg;
4073     int mb_x, mb_y, pdif = 0;
4074     int i, j;
4075     MpegEncContext best_s, backup_s;
4076     uint8_t bit_buf[2][3000];
4077     uint8_t bit_buf2[2][3000];
4078     uint8_t bit_buf_tex[2][3000];
4079     PutBitContext pb[2], pb2[2], tex_pb[2];
4080 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4081
4082     for(i=0; i<2; i++){
4083         init_put_bits(&pb    [i], bit_buf    [i], 3000);
4084         init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
4085         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
4086     }
4087
4088     s->last_bits= put_bits_count(&s->pb);
4089     s->mv_bits=0;
4090     s->misc_bits=0;
4091     s->i_tex_bits=0;
4092     s->p_tex_bits=0;
4093     s->i_count=0;
4094     s->f_count=0;
4095     s->b_count=0;
4096     s->skip_count=0;
4097
4098     for(i=0; i<3; i++){
4099         /* init last dc values */
4100         /* note: quant matrix value (8) is implied here */
4101         s->last_dc[i] = 128 << s->intra_dc_precision;
4102
4103         s->current_picture_ptr->error[i] = 0;
4104     }
4105     s->mb_skip_run = 0;
4106     memset(s->last_mv, 0, sizeof(s->last_mv));
4107
4108     s->last_mv_dir = 0;
4109
4110 #ifdef CONFIG_RISKY
4111     switch(s->codec_id){
4112     case CODEC_ID_H263:
4113     case CODEC_ID_H263P:
4114     case CODEC_ID_FLV1:
4115         s->gob_index = ff_h263_get_gob_height(s);
4116         break;
4117     case CODEC_ID_MPEG4:
4118         if(s->partitioned_frame)
4119             ff_mpeg4_init_partitions(s);
4120         break;
4121     }
4122 #endif
4123
4124     s->resync_mb_x=0;
4125     s->resync_mb_y=0;
4126     s->first_slice_line = 1;
4127     s->ptr_lastgob = s->pb.buf;
4128     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4129 //    printf("row %d at %X\n", s->mb_y, (int)s);
4130         s->mb_x=0;
4131         s->mb_y= mb_y;
4132
4133         ff_set_qscale(s, s->qscale);
4134         ff_init_block_index(s);
4135
4136         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4137             const int xy= mb_y*s->mb_stride + mb_x;
4138             int mb_type= s->mb_type[xy];
4139 //            int d;
4140             int dmin= INT_MAX;
4141             int dir;
4142
4143             s->mb_x = mb_x;
4144             ff_update_block_index(s);
4145
4146             /* write gob / video packet header  */
4147 #ifdef CONFIG_RISKY
4148             if(s->rtp_mode){
4149                 int current_packet_size, is_gob_start;
4150
4151                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4152
4153                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4154
4155                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4156
4157                 switch(s->codec_id){
4158                 case CODEC_ID_H263:
4159                 case CODEC_ID_H263P:
4160                     if(!s->h263_slice_structured)
4161                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4162                     break;
4163                 case CODEC_ID_MPEG2VIDEO:
4164                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4165                 case CODEC_ID_MPEG1VIDEO:
4166                     if(s->mb_skip_run) is_gob_start=0;
4167                     break;
4168                 }
4169
4170                 if(is_gob_start){
4171                     if(s->start_mb_y != mb_y || mb_x!=0){
4172                         write_slice_end(s);
4173
4174                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4175                             ff_mpeg4_init_partitions(s);
4176                         }
4177                     }
4178
4179                     assert((put_bits_count(&s->pb)&7) == 0);
4180                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4181
4182                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4183                         int r= put_bits_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
4184                         int d= 100 / s->avctx->error_rate;
4185                         if(r % d == 0){
4186                             current_packet_size=0;
4187 #ifndef ALT_BITSTREAM_WRITER
4188                             s->pb.buf_ptr= s->ptr_lastgob;
4189 #endif
4190                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4191                         }
4192                     }
4193
4194                     if (s->avctx->rtp_callback)
4195                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, 0);
4196
4197                     switch(s->codec_id){
4198                     case CODEC_ID_MPEG4:
4199                         ff_mpeg4_encode_video_packet_header(s);
4200                         ff_mpeg4_clean_buffers(s);
4201                     break;
4202                     case CODEC_ID_MPEG1VIDEO:
4203                     case CODEC_ID_MPEG2VIDEO:
4204                         ff_mpeg1_encode_slice_header(s);
4205                         ff_mpeg1_clean_buffers(s);
4206                     break;
4207                     case CODEC_ID_H263:
4208                     case CODEC_ID_H263P:
4209                         h263_encode_gob_header(s, mb_y);
4210                     break;
4211                     }
4212
4213                     if(s->flags&CODEC_FLAG_PASS1){
4214                         int bits= put_bits_count(&s->pb);
4215                         s->misc_bits+= bits - s->last_bits;
4216                         s->last_bits= bits;
4217                     }
4218
4219                     s->ptr_lastgob += current_packet_size;
4220                     s->first_slice_line=1;
4221                     s->resync_mb_x=mb_x;
4222                     s->resync_mb_y=mb_y;
4223                 }
4224             }
4225 #endif
4226
4227             if(  (s->resync_mb_x   == s->mb_x)
4228                && s->resync_mb_y+1 == s->mb_y){
4229                 s->first_slice_line=0;
4230             }
4231
4232             s->mb_skiped=0;
4233             s->dquant=0; //only for QP_RD
4234
4235             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible
4236                 int next_block=0;
4237                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4238
4239                 copy_context_before_encode(&backup_s, s, -1);
4240                 backup_s.pb= s->pb;
4241                 best_s.data_partitioning= s->data_partitioning;
4242                 best_s.partitioned_frame= s->partitioned_frame;
4243                 if(s->data_partitioning){
4244                     backup_s.pb2= s->pb2;
4245                     backup_s.tex_pb= s->tex_pb;
4246                 }
4247
4248                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4249                     s->mv_dir = MV_DIR_FORWARD;
4250                     s->mv_type = MV_TYPE_16X16;
4251                     s->mb_intra= 0;
4252                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4253                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4254                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
4255                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4256                 }
4257                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
4258                     s->mv_dir = MV_DIR_FORWARD;
4259                     s->mv_type = MV_TYPE_FIELD;
4260                     s->mb_intra= 0;
4261                     for(i=0; i<2; i++){
4262                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4263                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4264                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4265                     }
4266                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
4267                                  &dmin, &next_block, 0, 0);
4268                 }
4269                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4270                     s->mv_dir = MV_DIR_FORWARD;
4271                     s->mv_type = MV_TYPE_16X16;
4272                     s->mb_intra= 0;
4273                     s->mv[0][0][0] = 0;
4274                     s->mv[0][0][1] = 0;
4275                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb,
4276                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4277                 }
4278                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
4279                     s->mv_dir = MV_DIR_FORWARD;
4280                     s->mv_type = MV_TYPE_8X8;
4281                     s->mb_intra= 0;
4282                     for(i=0; i<4; i++){
4283                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4284                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4285                     }
4286                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
4287                                  &dmin, &next_block, 0, 0);
4288                 }
4289                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4290                     s->mv_dir = MV_DIR_FORWARD;
4291                     s->mv_type = MV_TYPE_16X16;
4292                     s->mb_intra= 0;
4293                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4294                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4295                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
4296                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4297                 }
4298                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4299                     s->mv_dir = MV_DIR_BACKWARD;
4300                     s->mv_type = MV_TYPE_16X16;
4301                     s->mb_intra= 0;
4302                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4303                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4304                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
4305                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4306                 }
4307                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4308                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4309                     s->mv_type = MV_TYPE_16X16;
4310                     s->mb_intra= 0;
4311                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4312                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4313                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4314                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4315                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
4316                                  &dmin, &next_block, 0, 0);
4317                 }
4318                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4319                     int mx= s->b_direct_mv_table[xy][0];
4320                     int my= s->b_direct_mv_table[xy][1];
4321
4322                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4323                     s->mb_intra= 0;
4324 #ifdef CONFIG_RISKY
4325                     ff_mpeg4_set_direct_mv(s, mx, my);
4326 #endif
4327                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
4328                                  &dmin, &next_block, mx, my);
4329                 }
4330                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
4331                     s->mv_dir = MV_DIR_FORWARD;
4332                     s->mv_type = MV_TYPE_FIELD;
4333                     s->mb_intra= 0;
4334                     for(i=0; i<2; i++){
4335                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4336                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4337                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4338                     }
4339                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
4340                                  &dmin, &next_block, 0, 0);
4341                 }
4342                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
4343                     s->mv_dir = MV_DIR_BACKWARD;
4344                     s->mv_type = MV_TYPE_FIELD;
4345                     s->mb_intra= 0;
4346                     for(i=0; i<2; i++){
4347                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4348                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4349                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4350                     }
4351                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
4352                                  &dmin, &next_block, 0, 0);
4353                 }
4354                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
4355                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4356                     s->mv_type = MV_TYPE_FIELD;
4357                     s->mb_intra= 0;
4358                     for(dir=0; dir<2; dir++){
4359                         for(i=0; i<2; i++){
4360                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4361                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4362                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4363                         }
4364                     }
4365                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
4366                                  &dmin, &next_block, 0, 0);
4367                 }
4368                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4369                     s->mv_dir = 0;
4370                     s->mv_type = MV_TYPE_16X16;
4371                     s->mb_intra= 1;
4372                     s->mv[0][0][0] = 0;
4373                     s->mv[0][0][1] = 0;
4374                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
4375                                  &dmin, &next_block, 0, 0);
4376                     if(s->h263_pred || s->h263_aic){
4377                         if(best_s.mb_intra)
4378                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4379                         else
4380                             ff_clean_intra_table_entries(s); //old mode?
4381                     }
4382                 }
4383
4384                 if(s->flags & CODEC_FLAG_QP_RD){
4385                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4386                         const int last_qp= backup_s.qscale;
4387                         int dquant, dir, qp, dc[6];
4388                         DCTELEM ac[6][16];
4389                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4390
4391                         assert(backup_s.dquant == 0);
4392
4393                         //FIXME intra
4394                         s->mv_dir= best_s.mv_dir;
4395                         s->mv_type = MV_TYPE_16X16;
4396                         s->mb_intra= best_s.mb_intra;
4397                         s->mv[0][0][0] = best_s.mv[0][0][0];
4398                         s->mv[0][0][1] = best_s.mv[0][0][1];
4399                         s->mv[1][0][0] = best_s.mv[1][0][0];
4400                         s->mv[1][0][1] = best_s.mv[1][0][1];
4401
4402                         dir= s->pict_type == B_TYPE ? 2 : 1;
4403                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4404                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4405                             qp= last_qp + dquant;
4406                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4407                                 break;
4408                             backup_s.dquant= dquant;
4409                             if(s->mb_intra){
4410                                 for(i=0; i<6; i++){
4411                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4412                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4413                                 }
4414                             }
4415
4416                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
4417                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4418                             if(best_s.qscale != qp){
4419                                 if(s->mb_intra){
4420                                     for(i=0; i<6; i++){
4421                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4422                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4423                                     }
4424                                 }
4425                                 if(dir > 0 && dquant==dir){
4426                                     dquant= 0;
4427                                     dir= -dir;
4428                                 }else
4429                                     break;
4430                             }
4431                         }
4432                         qp= best_s.qscale;
4433                         s->current_picture.qscale_table[xy]= qp;
4434                     }
4435                 }
4436
4437                 copy_context_after_encode(s, &best_s, -1);
4438
4439                 pb_bits_count= put_bits_count(&s->pb);
4440                 flush_put_bits(&s->pb);
4441                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4442                 s->pb= backup_s.pb;
4443
4444                 if(s->data_partitioning){
4445                     pb2_bits_count= put_bits_count(&s->pb2);
4446                     flush_put_bits(&s->pb2);
4447                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4448                     s->pb2= backup_s.pb2;
4449
4450                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4451                     flush_put_bits(&s->tex_pb);
4452                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4453                     s->tex_pb= backup_s.tex_pb;
4454                 }
4455                 s->last_bits= put_bits_count(&s->pb);
4456
4457 #ifdef CONFIG_RISKY
4458                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4459                     ff_h263_update_motion_val(s);
4460 #endif
4461
4462                 if(next_block==0){ //FIXME 16 vs linesize16
4463                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
4464                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
4465                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
4466                 }
4467
4468                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4469                     MPV_decode_mb(s, s->block);
4470             } else {
4471                 int motion_x, motion_y;
4472                 s->mv_type=MV_TYPE_16X16;
4473                 // only one MB-Type possible
4474
4475                 switch(mb_type){
4476                 case CANDIDATE_MB_TYPE_INTRA:
4477                     s->mv_dir = 0;
4478                     s->mb_intra= 1;
4479                     motion_x= s->mv[0][0][0] = 0;
4480                     motion_y= s->mv[0][0][1] = 0;
4481                     break;
4482                 case CANDIDATE_MB_TYPE_INTER:
4483                     s->mv_dir = MV_DIR_FORWARD;
4484                     s->mb_intra= 0;
4485                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4486                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4487                     break;
4488                 case CANDIDATE_MB_TYPE_INTER_I:
4489                     s->mv_dir = MV_DIR_FORWARD;
4490                     s->mv_type = MV_TYPE_FIELD;
4491                     s->mb_intra= 0;
4492                     for(i=0; i<2; i++){
4493                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4494                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4495                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4496                     }
4497                     motion_x = motion_y = 0;
4498                     break;
4499                 case CANDIDATE_MB_TYPE_INTER4V:
4500                     s->mv_dir = MV_DIR_FORWARD;
4501                     s->mv_type = MV_TYPE_8X8;
4502                     s->mb_intra= 0;
4503                     for(i=0; i<4; i++){
4504                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4505                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4506                     }
4507                     motion_x= motion_y= 0;
4508                     break;
4509                 case CANDIDATE_MB_TYPE_DIRECT:
4510                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4511                     s->mb_intra= 0;
4512                     motion_x=s->b_direct_mv_table[xy][0];
4513                     motion_y=s->b_direct_mv_table[xy][1];
4514 #ifdef CONFIG_RISKY
4515                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
4516 #endif
4517                     break;
4518                 case CANDIDATE_MB_TYPE_BIDIR:
4519                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4520                     s->mb_intra= 0;
4521                     motion_x=0;
4522                     motion_y=0;
4523                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4524                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4525                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4526                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4527                     break;
4528                 case CANDIDATE_MB_TYPE_BACKWARD:
4529                     s->mv_dir = MV_DIR_BACKWARD;
4530                     s->mb_intra= 0;
4531                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4532                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4533                     break;
4534                 case CANDIDATE_MB_TYPE_FORWARD:
4535                     s->mv_dir = MV_DIR_FORWARD;
4536                     s->mb_intra= 0;
4537                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4538                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4539 //                    printf(" %d %d ", motion_x, motion_y);
4540                     break;
4541                 case CANDIDATE_MB_TYPE_FORWARD_I:
4542                     s->mv_dir = MV_DIR_FORWARD;
4543                     s->mv_type = MV_TYPE_FIELD;
4544                     s->mb_intra= 0;
4545                     for(i=0; i<2; i++){
4546                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4547                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4548                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4549                     }
4550                     motion_x=motion_y=0;
4551                     break;
4552                 case CANDIDATE_MB_TYPE_BACKWARD_I:
4553                     s->mv_dir = MV_DIR_BACKWARD;
4554                     s->mv_type = MV_TYPE_FIELD;
4555                     s->mb_intra= 0;
4556                     for(i=0; i<2; i++){
4557                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4558                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4559                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4560                     }
4561                     motion_x=motion_y=0;
4562                     break;
4563                 case CANDIDATE_MB_TYPE_BIDIR_I:
4564                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4565                     s->mv_type = MV_TYPE_FIELD;
4566                     s->mb_intra= 0;
4567                     for(dir=0; dir<2; dir++){
4568                         for(i=0; i<2; i++){
4569                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4570                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4571                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4572                         }
4573                     }
4574                     motion_x=motion_y=0;
4575                     break;
4576                 default:
4577                     motion_x=motion_y=0; //gcc warning fix
4578                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
4579                 }
4580
4581                 encode_mb(s, motion_x, motion_y);
4582
4583                 // RAL: Update last macrobloc type
4584                 s->last_mv_dir = s->mv_dir;
4585
4586 #ifdef CONFIG_RISKY
4587                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4588                     ff_h263_update_motion_val(s);
4589 #endif
4590
4591                 MPV_decode_mb(s, s->block);
4592             }
4593
4594             /* clean the MV table in IPS frames for direct mode in B frames */
4595             if(s->mb_intra /* && I,P,S_TYPE */){
4596                 s->p_mv_table[xy][0]=0;
4597                 s->p_mv_table[xy][1]=0;
4598             }
4599
4600             if(s->flags&CODEC_FLAG_PSNR){
4601                 int w= 16;
4602                 int h= 16;
4603
4604                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4605                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4606
4607                 s->current_picture_ptr->error[0] += sse(
4608                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
4609                     s->dest[0], w, h, s->linesize);
4610                 s->current_picture_ptr->error[1] += sse(
4611                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4612                     s->dest[1], w>>1, h>>1, s->uvlinesize);
4613                 s->current_picture_ptr->error[2] += sse(
4614                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4615                     s->dest[2], w>>1, h>>1, s->uvlinesize);
4616             }
4617             if(s->loop_filter)
4618                 ff_h263_loop_filter(s);
4619 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
4620         }
4621     }
4622
4623 #ifdef CONFIG_RISKY
4624     //not beautifull here but we must write it before flushing so it has to be here
4625     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
4626         msmpeg4_encode_ext_header(s);
4627 #endif
4628
4629     write_slice_end(s);
4630
4631     /* Send the last GOB if RTP */
4632     if (s->avctx->rtp_callback) {
4633         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
4634         /* Call the RTP callback to send the last GOB */
4635         emms_c();
4636         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, 0);
4637     }
4638
4639     return 0;
4640 }
4641
4642 #define MERGE(field) dst->field += src->field; src->field=0
4643 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
4644     MERGE(me.scene_change_score);
4645     MERGE(me.mc_mb_var_sum_temp);
4646     MERGE(me.mb_var_sum_temp);
4647 }
4648
4649 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
4650     int i;
4651
4652     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
4653     MERGE(dct_count[1]);
4654     MERGE(mv_bits);
4655     MERGE(i_tex_bits);
4656     MERGE(p_tex_bits);
4657     MERGE(i_count);
4658     MERGE(f_count);
4659     MERGE(b_count);
4660     MERGE(skip_count);
4661     MERGE(misc_bits);
4662     MERGE(error_count);
4663     MERGE(padding_bug_score);
4664
4665     if(dst->avctx->noise_reduction){
4666         for(i=0; i<64; i++){
4667             MERGE(dct_error_sum[0][i]);
4668             MERGE(dct_error_sum[1][i]);
4669         }
4670     }
4671
4672     assert(put_bits_count(&src->pb) % 8 ==0);
4673     assert(put_bits_count(&dst->pb) % 8 ==0);
4674     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
4675     flush_put_bits(&dst->pb);
4676 }
4677
4678 static void encode_picture(MpegEncContext *s, int picture_number)
4679 {
4680     int i;
4681     int bits;
4682
4683     s->picture_number = picture_number;
4684
4685     /* Reset the average MB variance */
4686     s->me.mb_var_sum_temp    =
4687     s->me.mc_mb_var_sum_temp = 0;
4688
4689 #ifdef CONFIG_RISKY
4690     /* we need to initialize some time vars before we can encode b-frames */
4691     // RAL: Condition added for MPEG1VIDEO
4692     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
4693         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
4694 #endif
4695
4696     s->me.scene_change_score=0;
4697
4698 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
4699
4700     if(s->pict_type==I_TYPE){
4701         if(s->msmpeg4_version >= 3) s->no_rounding=1;
4702         else                        s->no_rounding=0;
4703     }else if(s->pict_type!=B_TYPE){
4704         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
4705             s->no_rounding ^= 1;
4706     }
4707
4708     s->mb_intra=0; //for the rate distoration & bit compare functions
4709     for(i=1; i<s->avctx->thread_count; i++){
4710         ff_update_duplicate_context(s->thread_context[i], s);
4711     }
4712
4713     ff_init_me(s);
4714
4715     /* Estimate motion for every MB */
4716     if(s->pict_type != I_TYPE){
4717         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
4718             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
4719                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4720             }
4721         }
4722
4723         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4724     }else /* if(s->pict_type == I_TYPE) */{
4725         /* I-Frame */
4726         for(i=0; i<s->mb_stride*s->mb_height; i++)
4727             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4728
4729         if(!s->fixed_qscale){
4730             /* finding spatial complexity for I-frame rate control */
4731             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4732         }
4733     }
4734     for(i=1; i<s->avctx->thread_count; i++){
4735         merge_context_after_me(s, s->thread_context[i]);
4736     }
4737     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
4738     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
4739     emms_c();
4740
4741     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
4742         s->pict_type= I_TYPE;
4743         for(i=0; i<s->mb_stride*s->mb_height; i++)
4744             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4745 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
4746     }
4747
4748     if(!s->umvplus){
4749         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
4750             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
4751
4752             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4753                 int a,b;
4754                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
4755                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
4756                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
4757             }
4758
4759             ff_fix_long_p_mvs(s);
4760             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
4761             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4762                 int j;
4763                 for(i=0; i<2; i++){
4764                     for(j=0; j<2; j++)
4765                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
4766                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
4767                 }
4768             }
4769         }
4770
4771         if(s->pict_type==B_TYPE){
4772             int a, b;
4773
4774             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
4775             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4776             s->f_code = FFMAX(a, b);
4777
4778             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
4779             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4780             s->b_code = FFMAX(a, b);
4781
4782             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
4783             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
4784             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4785             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4786             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4787                 int dir, j;
4788                 for(dir=0; dir<2; dir++){
4789                     for(i=0; i<2; i++){
4790                         for(j=0; j<2; j++){
4791                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
4792                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
4793                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
4794                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
4795                         }
4796                     }
4797                 }
4798             }
4799         }
4800     }
4801
4802     if (!s->fixed_qscale)
4803         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
4804
4805     if(s->adaptive_quant){
4806 #ifdef CONFIG_RISKY
4807         switch(s->codec_id){
4808         case CODEC_ID_MPEG4:
4809             ff_clean_mpeg4_qscales(s);
4810             break;
4811         case CODEC_ID_H263:
4812         case CODEC_ID_H263P:
4813         case CODEC_ID_FLV1:
4814             ff_clean_h263_qscales(s);
4815             break;
4816         }
4817 #endif
4818
4819         s->lambda= s->lambda_table[0];
4820         //FIXME broken
4821     }else
4822         s->lambda= s->current_picture.quality;
4823 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
4824     update_qscale(s);
4825
4826     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
4827         s->qscale= 3; //reduce cliping problems
4828
4829     if (s->out_format == FMT_MJPEG) {
4830         /* for mjpeg, we do include qscale in the matrix */
4831         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
4832         for(i=1;i<64;i++){
4833             int j= s->dsp.idct_permutation[i];
4834
4835             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
4836         }
4837         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
4838                        s->intra_matrix, s->intra_quant_bias, 8, 8);
4839         s->qscale= 8;
4840     }
4841
4842     //FIXME var duplication
4843     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
4844     s->current_picture.pict_type= s->pict_type;
4845
4846     if(s->current_picture.key_frame)
4847         s->picture_in_gop_number=0;
4848
4849     s->last_bits= put_bits_count(&s->pb);
4850     switch(s->out_format) {
4851     case FMT_MJPEG:
4852         mjpeg_picture_header(s);
4853         break;
4854 #ifdef CONFIG_RISKY
4855     case FMT_H263:
4856         if (s->codec_id == CODEC_ID_WMV2)
4857             ff_wmv2_encode_picture_header(s, picture_number);
4858         else if (s->h263_msmpeg4)
4859             msmpeg4_encode_picture_header(s, picture_number);
4860         else if (s->h263_pred)
4861             mpeg4_encode_picture_header(s, picture_number);
4862         else if (s->codec_id == CODEC_ID_RV10)
4863             rv10_encode_picture_header(s, picture_number);
4864         else if (s->codec_id == CODEC_ID_FLV1)
4865             ff_flv_encode_picture_header(s, picture_number);
4866         else
4867             h263_encode_picture_header(s, picture_number);
4868         break;
4869 #endif
4870     case FMT_MPEG1:
4871         mpeg1_encode_picture_header(s, picture_number);
4872         break;
4873     case FMT_H264:
4874         break;
4875     default:
4876         assert(0);
4877     }
4878     bits= put_bits_count(&s->pb);
4879     s->header_bits= bits - s->last_bits;
4880
4881     for(i=1; i<s->avctx->thread_count; i++){
4882         update_duplicate_context_after_me(s->thread_context[i], s);
4883     }
4884     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4885     for(i=1; i<s->avctx->thread_count; i++){
4886         merge_context_after_encode(s, s->thread_context[i]);
4887     }
4888     emms_c();
4889 }
4890
4891 #endif //CONFIG_ENCODERS
4892
4893 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
4894     const int intra= s->mb_intra;
4895     int i;
4896
4897     s->dct_count[intra]++;
4898
4899     for(i=0; i<64; i++){
4900         int level= block[i];
4901
4902         if(level){
4903             if(level>0){
4904                 s->dct_error_sum[intra][i] += level;
4905                 level -= s->dct_offset[intra][i];
4906                 if(level<0) level=0;
4907             }else{
4908                 s->dct_error_sum[intra][i] -= level;
4909                 level += s->dct_offset[intra][i];
4910                 if(level>0) level=0;
4911             }
4912             block[i]= level;
4913         }
4914     }
4915 }
4916
4917 #ifdef CONFIG_ENCODERS
4918
4919 static int dct_quantize_trellis_c(MpegEncContext *s,
4920                         DCTELEM *block, int n,
4921                         int qscale, int *overflow){
4922     const int *qmat;
4923     const uint8_t *scantable= s->intra_scantable.scantable;
4924     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4925     int max=0;
4926     unsigned int threshold1, threshold2;
4927     int bias=0;
4928     int run_tab[65];
4929     int level_tab[65];
4930     int score_tab[65];
4931     int survivor[65];
4932     int survivor_count;
4933     int last_run=0;
4934     int last_level=0;
4935     int last_score= 0;
4936     int last_i;
4937     int coeff[2][64];
4938     int coeff_count[64];
4939     int qmul, qadd, start_i, last_non_zero, i, dc;
4940     const int esc_length= s->ac_esc_length;
4941     uint8_t * length;
4942     uint8_t * last_length;
4943     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4944
4945     s->dsp.fdct (block);
4946
4947     if(s->dct_error_sum)
4948         s->denoise_dct(s, block);
4949     qmul= qscale*16;
4950     qadd= ((qscale-1)|1)*8;
4951
4952     if (s->mb_intra) {
4953         int q;
4954         if (!s->h263_aic) {
4955             if (n < 4)
4956                 q = s->y_dc_scale;
4957             else
4958                 q = s->c_dc_scale;
4959             q = q << 3;
4960         } else{
4961             /* For AIC we skip quant/dequant of INTRADC */
4962             q = 1 << 3;
4963             qadd=0;
4964         }
4965
4966         /* note: block[0] is assumed to be positive */
4967         block[0] = (block[0] + (q >> 1)) / q;
4968         start_i = 1;
4969         last_non_zero = 0;
4970         qmat = s->q_intra_matrix[qscale];
4971         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4972             bias= 1<<(QMAT_SHIFT-1);
4973         length     = s->intra_ac_vlc_length;
4974         last_length= s->intra_ac_vlc_last_length;
4975     } else {
4976         start_i = 0;
4977         last_non_zero = -1;
4978         qmat = s->q_inter_matrix[qscale];
4979         length     = s->inter_ac_vlc_length;
4980         last_length= s->inter_ac_vlc_last_length;
4981     }
4982     last_i= start_i;
4983
4984     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4985     threshold2= (threshold1<<1);
4986
4987     for(i=63; i>=start_i; i--) {
4988         const int j = scantable[i];
4989         int level = block[j] * qmat[j];
4990
4991         if(((unsigned)(level+threshold1))>threshold2){
4992             last_non_zero = i;
4993             break;
4994         }
4995     }
4996
4997     for(i=start_i; i<=last_non_zero; i++) {
4998         const int j = scantable[i];
4999         int level = block[j] * qmat[j];
5000
5001 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5002 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5003         if(((unsigned)(level+threshold1))>threshold2){
5004             if(level>0){
5005                 level= (bias + level)>>QMAT_SHIFT;
5006                 coeff[0][i]= level;
5007                 coeff[1][i]= level-1;
5008 //                coeff[2][k]= level-2;
5009             }else{
5010                 level= (bias - level)>>QMAT_SHIFT;
5011                 coeff[0][i]= -level;
5012                 coeff[1][i]= -level+1;
5013 //                coeff[2][k]= -level+2;
5014             }
5015             coeff_count[i]= FFMIN(level, 2);
5016             assert(coeff_count[i]);
5017             max |=level;
5018         }else{
5019             coeff[0][i]= (level>>31)|1;
5020             coeff_count[i]= 1;
5021         }
5022     }
5023
5024     *overflow= s->max_qcoeff < max; //overflow might have happend
5025
5026     if(last_non_zero < start_i){
5027         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5028         return last_non_zero;
5029     }
5030
5031     score_tab[start_i]= 0;
5032     survivor[0]= start_i;
5033     survivor_count= 1;
5034
5035     for(i=start_i; i<=last_non_zero; i++){
5036         int level_index, j;
5037         const int dct_coeff= ABS(block[ scantable[i] ]);
5038         const int zero_distoration= dct_coeff*dct_coeff;
5039         int best_score=256*256*256*120;
5040         for(level_index=0; level_index < coeff_count[i]; level_index++){
5041             int distoration;
5042             int level= coeff[level_index][i];
5043             const int alevel= ABS(level);
5044             int unquant_coeff;
5045
5046             assert(level);
5047
5048             if(s->out_format == FMT_H263){
5049                 unquant_coeff= alevel*qmul + qadd;
5050             }else{ //MPEG1
5051                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5052                 if(s->mb_intra){
5053                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5054                         unquant_coeff =   (unquant_coeff - 1) | 1;
5055                 }else{
5056                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5057                         unquant_coeff =   (unquant_coeff - 1) | 1;
5058                 }
5059                 unquant_coeff<<= 3;
5060             }
5061
5062             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5063             level+=64;
5064             if((level&(~127)) == 0){
5065                 for(j=survivor_count-1; j>=0; j--){
5066                     int run= i - survivor[j];
5067                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5068                     score += score_tab[i-run];
5069
5070                     if(score < best_score){
5071                         best_score= score;
5072                         run_tab[i+1]= run;
5073                         level_tab[i+1]= level-64;
5074                     }
5075                 }
5076
5077                 if(s->out_format == FMT_H263){
5078                     for(j=survivor_count-1; j>=0; j--){
5079                         int run= i - survivor[j];
5080                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5081                         score += score_tab[i-run];
5082                         if(score < last_score){
5083                             last_score= score;
5084                             last_run= run;
5085                             last_level= level-64;
5086                             last_i= i+1;
5087                         }
5088                     }
5089                 }
5090             }else{
5091                 distoration += esc_length*lambda;
5092                 for(j=survivor_count-1; j>=0; j--){
5093                     int run= i - survivor[j];
5094                     int score= distoration + score_tab[i-run];
5095
5096                     if(score < best_score){
5097                         best_score= score;
5098                         run_tab[i+1]= run;
5099                         level_tab[i+1]= level-64;
5100                     }
5101                 }
5102
5103                 if(s->out_format == FMT_H263){
5104                   for(j=survivor_count-1; j>=0; j--){
5105                         int run= i - survivor[j];
5106                         int score= distoration + score_tab[i-run];
5107                         if(score < last_score){
5108                             last_score= score;
5109                             last_run= run;
5110                             last_level= level-64;
5111                             last_i= i+1;
5112                         }
5113                     }
5114                 }
5115             }
5116         }
5117
5118         score_tab[i+1]= best_score;
5119
5120         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5121         if(last_non_zero <= 27){
5122             for(; survivor_count; survivor_count--){
5123                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5124                     break;
5125             }
5126         }else{
5127             for(; survivor_count; survivor_count--){
5128                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5129                     break;
5130             }
5131         }
5132
5133         survivor[ survivor_count++ ]= i+1;
5134     }
5135
5136     if(s->out_format != FMT_H263){
5137         last_score= 256*256*256*120;
5138         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5139             int score= score_tab[i];
5140             if(i) score += lambda*2; //FIXME exacter?
5141
5142             if(score < last_score){
5143                 last_score= score;
5144                 last_i= i;
5145                 last_level= level_tab[i];
5146                 last_run= run_tab[i];
5147             }
5148         }
5149     }
5150
5151     s->coded_score[n] = last_score;
5152
5153     dc= ABS(block[0]);
5154     last_non_zero= last_i - 1;
5155     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5156
5157     if(last_non_zero < start_i)
5158         return last_non_zero;
5159
5160     if(last_non_zero == 0 && start_i == 0){
5161         int best_level= 0;
5162         int best_score= dc * dc;
5163
5164         for(i=0; i<coeff_count[0]; i++){
5165             int level= coeff[i][0];
5166             int alevel= ABS(level);
5167             int unquant_coeff, score, distortion;
5168
5169             if(s->out_format == FMT_H263){
5170                     unquant_coeff= (alevel*qmul + qadd)>>3;
5171             }else{ //MPEG1
5172                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5173                     unquant_coeff =   (unquant_coeff - 1) | 1;
5174             }
5175             unquant_coeff = (unquant_coeff + 4) >> 3;
5176             unquant_coeff<<= 3 + 3;
5177
5178             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5179             level+=64;
5180             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5181             else                    score= distortion + esc_length*lambda;
5182
5183             if(score < best_score){
5184                 best_score= score;
5185                 best_level= level - 64;
5186             }
5187         }
5188         block[0]= best_level;
5189         s->coded_score[n] = best_score - dc*dc;
5190         if(best_level == 0) return -1;
5191         else                return last_non_zero;
5192     }
5193
5194     i= last_i;
5195     assert(last_level);
5196
5197     block[ perm_scantable[last_non_zero] ]= last_level;
5198     i -= last_run + 1;
5199
5200     for(; i>start_i; i -= run_tab[i] + 1){
5201         block[ perm_scantable[i-1] ]= level_tab[i];
5202     }
5203
5204     return last_non_zero;
5205 }
5206
5207 //#define REFINE_STATS 1
5208 static int16_t basis[64][64];
5209
5210 static void build_basis(uint8_t *perm){
5211     int i, j, x, y;
5212     emms_c();
5213     for(i=0; i<8; i++){
5214         for(j=0; j<8; j++){
5215             for(y=0; y<8; y++){
5216                 for(x=0; x<8; x++){
5217                     double s= 0.25*(1<<BASIS_SHIFT);
5218                     int index= 8*i + j;
5219                     int perm_index= perm[index];
5220                     if(i==0) s*= sqrt(0.5);
5221                     if(j==0) s*= sqrt(0.5);
5222                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5223                 }
5224             }
5225         }
5226     }
5227 }
5228
5229 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5230                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5231                         int n, int qscale){
5232     int16_t rem[64];
5233     DCTELEM d1[64];
5234     const int *qmat;
5235     const uint8_t *scantable= s->intra_scantable.scantable;
5236     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5237 //    unsigned int threshold1, threshold2;
5238 //    int bias=0;
5239     int run_tab[65];
5240     int prev_run=0;
5241     int prev_level=0;
5242     int qmul, qadd, start_i, last_non_zero, i, dc;
5243     uint8_t * length;
5244     uint8_t * last_length;
5245     int lambda;
5246     int rle_index, run, q, sum;
5247 #ifdef REFINE_STATS
5248 static int count=0;
5249 static int after_last=0;
5250 static int to_zero=0;
5251 static int from_zero=0;
5252 static int raise=0;
5253 static int lower=0;
5254 static int messed_sign=0;
5255 #endif
5256
5257     if(basis[0][0] == 0)
5258         build_basis(s->dsp.idct_permutation);
5259
5260     qmul= qscale*2;
5261     qadd= (qscale-1)|1;
5262     if (s->mb_intra) {
5263         if (!s->h263_aic) {
5264             if (n < 4)
5265                 q = s->y_dc_scale;
5266             else
5267                 q = s->c_dc_scale;
5268         } else{
5269             /* For AIC we skip quant/dequant of INTRADC */
5270             q = 1;
5271             qadd=0;
5272         }
5273         q <<= RECON_SHIFT-3;
5274         /* note: block[0] is assumed to be positive */
5275         dc= block[0]*q;
5276 //        block[0] = (block[0] + (q >> 1)) / q;
5277         start_i = 1;
5278         qmat = s->q_intra_matrix[qscale];
5279 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5280 //            bias= 1<<(QMAT_SHIFT-1);
5281         length     = s->intra_ac_vlc_length;
5282         last_length= s->intra_ac_vlc_last_length;
5283     } else {
5284         dc= 0;
5285         start_i = 0;
5286         qmat = s->q_inter_matrix[qscale];
5287         length     = s->inter_ac_vlc_length;
5288         last_length= s->inter_ac_vlc_last_length;
5289     }
5290     last_non_zero = s->block_last_index[n];
5291
5292 #ifdef REFINE_STATS
5293 {START_TIMER
5294 #endif
5295     dc += (1<<(RECON_SHIFT-1));
5296     for(i=0; i<64; i++){
5297         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5298     }
5299 #ifdef REFINE_STATS
5300 STOP_TIMER("memset rem[]")}
5301 #endif
5302     sum=0;
5303     for(i=0; i<64; i++){
5304         int one= 36;
5305         int qns=4;
5306         int w;
5307
5308         w= ABS(weight[i]) + qns*one;
5309         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5310
5311         weight[i] = w;
5312 //        w=weight[i] = (63*qns + (w/2)) / w;
5313
5314         assert(w>0);
5315         assert(w<(1<<6));
5316         sum += w*w;
5317     }
5318     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5319 #ifdef REFINE_STATS
5320 {START_TIMER
5321 #endif
5322     run=0;
5323     rle_index=0;
5324     for(i=start_i; i<=last_non_zero; i++){
5325         int j= perm_scantable[i];
5326         const int level= block[j];
5327         int coeff;
5328
5329         if(level){
5330             if(level<0) coeff= qmul*level - qadd;
5331             else        coeff= qmul*level + qadd;
5332             run_tab[rle_index++]=run;
5333             run=0;
5334
5335             s->dsp.add_8x8basis(rem, basis[j], coeff);
5336         }else{
5337             run++;
5338         }
5339     }
5340 #ifdef REFINE_STATS
5341 if(last_non_zero>0){
5342 STOP_TIMER("init rem[]")
5343 }
5344 }
5345
5346 {START_TIMER
5347 #endif
5348     for(;;){
5349         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5350         int best_coeff=0;
5351         int best_change=0;
5352         int run2, best_unquant_change=0, analyze_gradient;
5353 #ifdef REFINE_STATS
5354 {START_TIMER
5355 #endif
5356         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5357
5358         if(analyze_gradient){
5359 #ifdef REFINE_STATS
5360 {START_TIMER
5361 #endif
5362             for(i=0; i<64; i++){
5363                 int w= weight[i];
5364
5365                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5366             }
5367 #ifdef REFINE_STATS
5368 STOP_TIMER("rem*w*w")}
5369 {START_TIMER
5370 #endif
5371             s->dsp.fdct(d1);
5372 #ifdef REFINE_STATS
5373 STOP_TIMER("dct")}
5374 #endif
5375         }
5376
5377         if(start_i){
5378             const int level= block[0];
5379             int change, old_coeff;
5380
5381             assert(s->mb_intra);
5382
5383             old_coeff= q*level;
5384
5385             for(change=-1; change<=1; change+=2){
5386                 int new_level= level + change;
5387                 int score, new_coeff;
5388
5389                 new_coeff= q*new_level;
5390                 if(new_coeff >= 2048 || new_coeff < 0)
5391                     continue;
5392
5393                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5394                 if(score<best_score){
5395                     best_score= score;
5396                     best_coeff= 0;
5397                     best_change= change;
5398                     best_unquant_change= new_coeff - old_coeff;
5399                 }
5400             }
5401         }
5402
5403         run=0;
5404         rle_index=0;
5405         run2= run_tab[rle_index++];
5406         prev_level=0;
5407         prev_run=0;
5408
5409         for(i=start_i; i<64; i++){
5410             int j= perm_scantable[i];
5411             const int level= block[j];
5412             int change, old_coeff;
5413
5414             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5415                 break;
5416
5417             if(level){
5418                 if(level<0) old_coeff= qmul*level - qadd;
5419                 else        old_coeff= qmul*level + qadd;
5420                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5421             }else{
5422                 old_coeff=0;
5423                 run2--;
5424                 assert(run2>=0 || i >= last_non_zero );
5425             }
5426
5427             for(change=-1; change<=1; change+=2){
5428                 int new_level= level + change;
5429                 int score, new_coeff, unquant_change;
5430
5431                 score=0;
5432                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5433                    continue;
5434
5435                 if(new_level){
5436                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5437                     else            new_coeff= qmul*new_level + qadd;
5438                     if(new_coeff >= 2048 || new_coeff <= -2048)
5439                         continue;
5440                     //FIXME check for overflow
5441
5442                     if(level){
5443                         if(level < 63 && level > -63){
5444                             if(i < last_non_zero)
5445                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5446                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5447                             else
5448                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5449                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5450                         }
5451                     }else{
5452                         assert(ABS(new_level)==1);
5453
5454                         if(analyze_gradient){
5455                             int g= d1[ scantable[i] ];
5456                             if(g && (g^new_level) >= 0)
5457                                 continue;
5458                         }
5459
5460                         if(i < last_non_zero){
5461                             int next_i= i + run2 + 1;
5462                             int next_level= block[ perm_scantable[next_i] ] + 64;
5463
5464                             if(next_level&(~127))
5465                                 next_level= 0;
5466
5467                             if(next_i < last_non_zero)
5468                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
5469                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
5470                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5471                             else
5472                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
5473                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5474                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5475                         }else{
5476                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
5477                             if(prev_level){
5478                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5479                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5480                             }
5481                         }
5482                     }
5483                 }else{
5484                     new_coeff=0;
5485                     assert(ABS(level)==1);
5486
5487                     if(i < last_non_zero){
5488                         int next_i= i + run2 + 1;
5489                         int next_level= block[ perm_scantable[next_i] ] + 64;
5490
5491                         if(next_level&(~127))
5492                             next_level= 0;
5493
5494                         if(next_i < last_non_zero)
5495                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5496                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
5497                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5498                         else
5499                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5500                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5501                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5502                     }else{
5503                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
5504                         if(prev_level){
5505                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5506                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5507                         }
5508                     }
5509                 }
5510
5511                 score *= lambda;
5512
5513                 unquant_change= new_coeff - old_coeff;
5514                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
5515
5516                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
5517                 if(score<best_score){
5518                     best_score= score;
5519                     best_coeff= i;
5520                     best_change= change;
5521                     best_unquant_change= unquant_change;
5522                 }
5523             }
5524             if(level){
5525                 prev_level= level + 64;
5526                 if(prev_level&(~127))
5527                     prev_level= 0;
5528                 prev_run= run;
5529                 run=0;
5530             }else{
5531                 run++;
5532             }
5533         }
5534 #ifdef REFINE_STATS
5535 STOP_TIMER("iterative step")}
5536 #endif
5537
5538         if(best_change){
5539             int j= perm_scantable[ best_coeff ];
5540
5541             block[j] += best_change;
5542
5543             if(best_coeff > last_non_zero){
5544                 last_non_zero= best_coeff;
5545                 assert(block[j]);
5546 #ifdef REFINE_STATS
5547 after_last++;
5548 #endif
5549             }else{
5550 #ifdef REFINE_STATS
5551 if(block[j]){
5552     if(block[j] - best_change){
5553         if(ABS(block[j]) > ABS(block[j] - best_change)){
5554             raise++;
5555         }else{
5556             lower++;
5557         }
5558     }else{
5559         from_zero++;
5560     }
5561 }else{
5562     to_zero++;
5563 }
5564 #endif
5565                 for(; last_non_zero>=start_i; last_non_zero--){
5566                     if(block[perm_scantable[last_non_zero]])
5567                         break;
5568                 }
5569             }
5570 #ifdef REFINE_STATS
5571 count++;
5572 if(256*256*256*64 % count == 0){
5573     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
5574 }
5575 #endif
5576             run=0;
5577             rle_index=0;
5578             for(i=start_i; i<=last_non_zero; i++){
5579                 int j= perm_scantable[i];
5580                 const int level= block[j];
5581
5582                  if(level){
5583                      run_tab[rle_index++]=run;
5584                      run=0;
5585                  }else{
5586                      run++;
5587                  }
5588             }
5589
5590             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
5591         }else{
5592             break;
5593         }
5594     }
5595 #ifdef REFINE_STATS
5596 if(last_non_zero>0){
5597 STOP_TIMER("iterative search")
5598 }
5599 }
5600 #endif
5601
5602     return last_non_zero;
5603 }
5604
5605 static int dct_quantize_c(MpegEncContext *s,
5606                         DCTELEM *block, int n,
5607                         int qscale, int *overflow)
5608 {
5609     int i, j, level, last_non_zero, q, start_i;
5610     const int *qmat;
5611     const uint8_t *scantable= s->intra_scantable.scantable;
5612     int bias;
5613     int max=0;
5614     unsigned int threshold1, threshold2;
5615
5616     s->dsp.fdct (block);
5617
5618     if(s->dct_error_sum)
5619         s->denoise_dct(s, block);
5620
5621     if (s->mb_intra) {
5622         if (!s->h263_aic) {
5623             if (n < 4)
5624                 q = s->y_dc_scale;
5625             else
5626                 q = s->c_dc_scale;
5627             q = q << 3;
5628         } else
5629             /* For AIC we skip quant/dequant of INTRADC */
5630             q = 1 << 3;
5631
5632         /* note: block[0] is assumed to be positive */
5633         block[0] = (block[0] + (q >> 1)) / q;
5634         start_i = 1;
5635         last_non_zero = 0;
5636         qmat = s->q_intra_matrix[qscale];
5637         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5638     } else {
5639         start_i = 0;
5640         last_non_zero = -1;
5641         qmat = s->q_inter_matrix[qscale];
5642         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5643     }
5644     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5645     threshold2= (threshold1<<1);
5646     for(i=63;i>=start_i;i--) {
5647         j = scantable[i];
5648         level = block[j] * qmat[j];
5649
5650         if(((unsigned)(level+threshold1))>threshold2){
5651             last_non_zero = i;
5652             break;
5653         }else{
5654             block[j]=0;
5655         }
5656     }
5657     for(i=start_i; i<=last_non_zero; i++) {
5658         j = scantable[i];
5659         level = block[j] * qmat[j];
5660
5661 //        if(   bias+level >= (1<<QMAT_SHIFT)
5662 //           || bias-level >= (1<<QMAT_SHIFT)){
5663         if(((unsigned)(level+threshold1))>threshold2){
5664             if(level>0){
5665                 level= (bias + level)>>QMAT_SHIFT;
5666                 block[j]= level;
5667             }else{
5668                 level= (bias - level)>>QMAT_SHIFT;
5669                 block[j]= -level;
5670             }
5671             max |=level;
5672         }else{
5673             block[j]=0;
5674         }
5675     }
5676     *overflow= s->max_qcoeff < max; //overflow might have happend
5677
5678     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
5679     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
5680         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
5681
5682     return last_non_zero;
5683 }
5684
5685 #endif //CONFIG_ENCODERS
5686
5687 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
5688                                    DCTELEM *block, int n, int qscale)
5689 {
5690     int i, level, nCoeffs;
5691     const uint16_t *quant_matrix;
5692
5693     nCoeffs= s->block_last_index[n];
5694
5695     if (n < 4)
5696         block[0] = block[0] * s->y_dc_scale;
5697     else
5698         block[0] = block[0] * s->c_dc_scale;
5699     /* XXX: only mpeg1 */
5700     quant_matrix = s->intra_matrix;
5701     for(i=1;i<=nCoeffs;i++) {
5702         int j= s->intra_scantable.permutated[i];
5703         level = block[j];
5704         if (level) {
5705             if (level < 0) {
5706                 level = -level;
5707                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5708                 level = (level - 1) | 1;
5709                 level = -level;
5710             } else {
5711                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5712                 level = (level - 1) | 1;
5713             }
5714             block[j] = level;
5715         }
5716     }
5717 }
5718
5719 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
5720                                    DCTELEM *block, int n, int qscale)
5721 {
5722     int i, level, nCoeffs;
5723     const uint16_t *quant_matrix;
5724
5725     nCoeffs= s->block_last_index[n];
5726
5727     quant_matrix = s->inter_matrix;
5728     for(i=0; i<=nCoeffs; i++) {
5729         int j= s->intra_scantable.permutated[i];
5730         level = block[j];
5731         if (level) {
5732             if (level < 0) {
5733                 level = -level;
5734                 level = (((level << 1) + 1) * qscale *
5735                          ((int) (quant_matrix[j]))) >> 4;
5736                 level = (level - 1) | 1;
5737                 level = -level;
5738             } else {
5739                 level = (((level << 1) + 1) * qscale *
5740                          ((int) (quant_matrix[j]))) >> 4;
5741                 level = (level - 1) | 1;
5742             }
5743             block[j] = level;
5744         }
5745     }
5746 }
5747
5748 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
5749                                    DCTELEM *block, int n, int qscale)
5750 {
5751     int i, level, nCoeffs;
5752     const uint16_t *quant_matrix;
5753
5754     if(s->alternate_scan) nCoeffs= 63;
5755     else nCoeffs= s->block_last_index[n];
5756
5757     if (n < 4)
5758         block[0] = block[0] * s->y_dc_scale;
5759     else
5760         block[0] = block[0] * s->c_dc_scale;
5761     quant_matrix = s->intra_matrix;
5762     for(i=1;i<=nCoeffs;i++) {
5763         int j= s->intra_scantable.permutated[i];
5764         level = block[j];
5765         if (level) {
5766             if (level < 0) {
5767                 level = -level;
5768                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5769                 level = -level;
5770             } else {
5771                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5772             }
5773             block[j] = level;
5774         }
5775     }
5776 }
5777
5778 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
5779                                    DCTELEM *block, int n, int qscale)
5780 {
5781     int i, level, nCoeffs;
5782     const uint16_t *quant_matrix;
5783     int sum=-1;
5784
5785     if(s->alternate_scan) nCoeffs= 63;
5786     else nCoeffs= s->block_last_index[n];
5787
5788     quant_matrix = s->inter_matrix;
5789     for(i=0; i<=nCoeffs; i++) {
5790         int j= s->intra_scantable.permutated[i];
5791         level = block[j];
5792         if (level) {
5793             if (level < 0) {
5794                 level = -level;
5795                 level = (((level << 1) + 1) * qscale *
5796                          ((int) (quant_matrix[j]))) >> 4;
5797                 level = -level;
5798             } else {
5799                 level = (((level << 1) + 1) * qscale *
5800                          ((int) (quant_matrix[j]))) >> 4;
5801             }
5802             block[j] = level;
5803             sum+=level;
5804         }
5805     }
5806     block[63]^=sum&1;
5807 }
5808
5809 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
5810                                   DCTELEM *block, int n, int qscale)
5811 {
5812     int i, level, qmul, qadd;
5813     int nCoeffs;
5814
5815     assert(s->block_last_index[n]>=0);
5816
5817     qmul = qscale << 1;
5818
5819     if (!s->h263_aic) {
5820         if (n < 4)
5821             block[0] = block[0] * s->y_dc_scale;
5822         else
5823             block[0] = block[0] * s->c_dc_scale;
5824         qadd = (qscale - 1) | 1;
5825     }else{
5826         qadd = 0;
5827     }
5828     if(s->ac_pred)
5829         nCoeffs=63;
5830     else
5831         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5832
5833     for(i=1; i<=nCoeffs; i++) {
5834         level = block[i];
5835         if (level) {
5836             if (level < 0) {
5837                 level = level * qmul - qadd;
5838             } else {
5839                 level = level * qmul + qadd;
5840             }
5841             block[i] = level;
5842         }
5843     }
5844 }
5845
5846 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
5847                                   DCTELEM *block, int n, int qscale)
5848 {
5849     int i, level, qmul, qadd;
5850     int nCoeffs;
5851
5852     assert(s->block_last_index[n]>=0);
5853
5854     qadd = (qscale - 1) | 1;
5855     qmul = qscale << 1;
5856
5857     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5858
5859     for(i=0; i<=nCoeffs; i++) {
5860         level = block[i];
5861         if (level) {
5862             if (level < 0) {
5863                 level = level * qmul - qadd;
5864             } else {
5865                 level = level * qmul + qadd;
5866             }
5867             block[i] = level;
5868         }
5869     }
5870 }
5871
5872 static void dct_unquantize_h261_intra_c(MpegEncContext *s,
5873                                   DCTELEM *block, int n, int qscale)
5874 {
5875     int i, level, even;
5876     int nCoeffs;
5877
5878     assert(s->block_last_index[n]>=0);
5879
5880     if (n < 4)
5881         block[0] = block[0] * s->y_dc_scale;
5882     else
5883         block[0] = block[0] * s->c_dc_scale;
5884     even = (qscale & 1)^1;
5885     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5886
5887     for(i=1; i<=nCoeffs; i++){
5888         level = block[i];
5889         if (level){
5890             if (level < 0){
5891                 level = qscale * ((level << 1) - 1) + even;
5892             }else{
5893                 level = qscale * ((level << 1) + 1) - even;
5894             }
5895         }
5896         block[i] = level;
5897     }
5898 }
5899
5900 static void dct_unquantize_h261_inter_c(MpegEncContext *s,
5901                                   DCTELEM *block, int n, int qscale)
5902 {
5903     int i, level, even;
5904     int nCoeffs;
5905
5906     assert(s->block_last_index[n]>=0);
5907
5908     even = (qscale & 1)^1;
5909
5910     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5911
5912     for(i=0; i<=nCoeffs; i++){
5913         level = block[i];
5914         if (level){
5915             if (level < 0){
5916                 level = qscale * ((level << 1) - 1) + even;
5917             }else{
5918                 level = qscale * ((level << 1) + 1) - even;
5919             }
5920         }
5921         block[i] = level;
5922     }
5923 }
5924
5925 static const AVOption mpeg4_options[] =
5926 {
5927     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
5928     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
5929                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
5930                        bit_rate_tolerance, 4, 240000000, 8000),
5931     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
5932     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
5933     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
5934                           rc_eq, "tex^qComp,option1,options2", 0),
5935     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
5936                        rc_min_rate, 4, 24000000, 0),
5937     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
5938                        rc_max_rate, 4, 24000000, 0),
5939     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
5940                           rc_buffer_aggressivity, 4, 24000000, 0),
5941     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
5942                           rc_initial_cplx, 0., 9999999., 0),
5943     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
5944                           i_quant_factor, 0., 0., 0),
5945     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
5946                           i_quant_factor, -999999., 999999., 0),
5947     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
5948                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
5949     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
5950                           lumi_masking, 0., 999999., 0),
5951     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
5952                           temporal_cplx_masking, 0., 999999., 0),
5953     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
5954                           spatial_cplx_masking, 0., 999999., 0),
5955     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
5956                           p_masking, 0., 999999., 0),
5957     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
5958                           dark_masking, 0., 999999., 0),
5959     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
5960                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
5961
5962     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
5963                        mb_qmin, 0, 8, 0),
5964     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
5965                        mb_qmin, 0, 8, 0),
5966
5967     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
5968                        me_cmp, 0, 24000000, 0),
5969     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
5970                        me_sub_cmp, 0, 24000000, 0),
5971
5972
5973     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
5974                        dia_size, 0, 24000000, 0),
5975     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
5976                        last_predictor_count, 0, 24000000, 0),
5977
5978     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
5979                        pre_me, 0, 24000000, 0),
5980     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
5981                        me_pre_cmp, 0, 24000000, 0),
5982
5983     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5984                        me_range, 0, 24000000, 0),
5985     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
5986                        pre_dia_size, 0, 24000000, 0),
5987     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
5988                        me_subpel_quality, 0, 24000000, 0),
5989     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5990                        me_range, 0, 24000000, 0),
5991     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
5992                         flags, CODEC_FLAG_PSNR, 0),
5993     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
5994                               rc_override),
5995     AVOPTION_SUB(avoptions_common),
5996     AVOPTION_END()
5997 };
5998
5999 #ifdef CONFIG_ENCODERS
6000 #ifdef CONFIG_RISKY
6001 AVCodec h263_encoder = {
6002     "h263",
6003     CODEC_TYPE_VIDEO,
6004     CODEC_ID_H263,
6005     sizeof(MpegEncContext),
6006     MPV_encode_init,
6007     MPV_encode_picture,
6008     MPV_encode_end,
6009 };
6010
6011 AVCodec h263p_encoder = {
6012     "h263p",
6013     CODEC_TYPE_VIDEO,
6014     CODEC_ID_H263P,
6015     sizeof(MpegEncContext),
6016     MPV_encode_init,
6017     MPV_encode_picture,
6018     MPV_encode_end,
6019 };
6020
6021 AVCodec flv_encoder = {
6022     "flv",
6023     CODEC_TYPE_VIDEO,
6024     CODEC_ID_FLV1,
6025     sizeof(MpegEncContext),
6026     MPV_encode_init,
6027     MPV_encode_picture,
6028     MPV_encode_end,
6029 };
6030
6031 AVCodec rv10_encoder = {
6032     "rv10",
6033     CODEC_TYPE_VIDEO,
6034     CODEC_ID_RV10,
6035     sizeof(MpegEncContext),
6036     MPV_encode_init,
6037     MPV_encode_picture,
6038     MPV_encode_end,
6039 };
6040
6041 AVCodec mpeg4_encoder = {
6042     "mpeg4",
6043     CODEC_TYPE_VIDEO,
6044     CODEC_ID_MPEG4,
6045     sizeof(MpegEncContext),
6046     MPV_encode_init,
6047     MPV_encode_picture,
6048     MPV_encode_end,
6049     .options = mpeg4_options,
6050     .capabilities= CODEC_CAP_DELAY,
6051 };
6052
6053 AVCodec msmpeg4v1_encoder = {
6054     "msmpeg4v1",
6055     CODEC_TYPE_VIDEO,
6056     CODEC_ID_MSMPEG4V1,
6057     sizeof(MpegEncContext),
6058     MPV_encode_init,
6059     MPV_encode_picture,
6060     MPV_encode_end,
6061     .options = mpeg4_options,
6062 };
6063
6064 AVCodec msmpeg4v2_encoder = {
6065     "msmpeg4v2",
6066     CODEC_TYPE_VIDEO,
6067     CODEC_ID_MSMPEG4V2,
6068     sizeof(MpegEncContext),
6069     MPV_encode_init,
6070     MPV_encode_picture,
6071     MPV_encode_end,
6072     .options = mpeg4_options,
6073 };
6074
6075 AVCodec msmpeg4v3_encoder = {
6076     "msmpeg4",
6077     CODEC_TYPE_VIDEO,
6078     CODEC_ID_MSMPEG4V3,
6079     sizeof(MpegEncContext),
6080     MPV_encode_init,
6081     MPV_encode_picture,
6082     MPV_encode_end,
6083     .options = mpeg4_options,
6084 };
6085
6086 AVCodec wmv1_encoder = {
6087     "wmv1",
6088     CODEC_TYPE_VIDEO,
6089     CODEC_ID_WMV1,
6090     sizeof(MpegEncContext),
6091     MPV_encode_init,
6092     MPV_encode_picture,
6093     MPV_encode_end,
6094     .options = mpeg4_options,
6095 };
6096
6097 #endif
6098
6099 AVCodec mjpeg_encoder = {
6100     "mjpeg",
6101     CODEC_TYPE_VIDEO,
6102     CODEC_ID_MJPEG,
6103     sizeof(MpegEncContext),
6104     MPV_encode_init,
6105     MPV_encode_picture,
6106     MPV_encode_end,
6107 };
6108
6109 #endif //CONFIG_ENCODERS