git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 /**
  23  * @file mpegvideo.c
  24  * The simplest mpeg encoder (well, it was the simplest!).
  25  */
  26
  27 #include <limits.h>
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31
  32 #ifdef USE_FASTMEMCPY
  33 #include "fastmemcpy.h"
  34 #endif
  35
  36 //#undef NDEBUG
  37 //#include <assert.h>
  38
  39 #ifdef CONFIG_ENCODERS
  40 static void encode_picture(MpegEncContext *s, int picture_number);
  41 #endif //CONFIG_ENCODERS
  42 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  43                                    DCTELEM *block, int n, int qscale);
  44 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_h263_c(MpegEncContext *s,
  47                                   DCTELEM *block, int n, int qscale);
  48 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  49 #ifdef CONFIG_ENCODERS
  50 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  51 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  52 static int sse_mb(MpegEncContext *s);
  53 #endif //CONFIG_ENCODERS
  54
  55 #ifdef HAVE_XVMC
  56 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  57 extern void XVMC_field_end(MpegEncContext *s);
  58 extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
  59 #endif
  60
  61 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  62
  63
  64 /* enable all paranoid tests for rounding, overflows, etc... */
  65 //#define PARANOID
  66
  67 //#define DEBUG
  68
  69
  70 /* for jpeg fast DCT */
  71 #define CONST_BITS 14
  72
  73 static const uint16_t aanscales[64] = {
  74     /* precomputed values scaled up by 14 bits */
  75     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  76     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  77     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  78     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  79     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  80     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  81     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  82     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  83 };
  84
  85 static const uint8_t h263_chroma_roundtab[16] = {
  86 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  87     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  88 };
  89
  90 #ifdef CONFIG_ENCODERS
  91 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  92 static uint8_t default_fcode_tab[MAX_MV*2+1];
  93
  94 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
  95
  96 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  97                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
  98 {
  99     int qscale;
 100
 101     for(qscale=qmin; qscale<=qmax; qscale++){
 102         int i;
 103         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
 104             for(i=0;i<64;i++) {
 105                 const int j= s->dsp.idct_permutation[i];
 106                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 107                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 108                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 109                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 110
 111                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 112                                 (qscale * quant_matrix[j]));
 113             }
 114         } else if (s->dsp.fdct == fdct_ifast) {
 115             for(i=0;i<64;i++) {
 116                 const int j= s->dsp.idct_permutation[i];
 117                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 118                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 119                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 120                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 121
 122                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 123                                 (aanscales[i] * qscale * quant_matrix[j]));
 124             }
 125         } else {
 126             for(i=0;i<64;i++) {
 127                 const int j= s->dsp.idct_permutation[i];
 128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 129                    So 16           <= qscale * quant_matrix[i]             <= 7905
 130                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 131                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 132                 */
 133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 134 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 135                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 136
 137                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 138                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 139             }
 140         }
 141     }
 142 }
 143 #endif //CONFIG_ENCODERS
 144
 145 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 146     int i;
 147     int end;
 148
 149     st->scantable= src_scantable;
 150
 151     for(i=0; i<64; i++){
 152         int j;
 153         j = src_scantable[i];
 154         st->permutated[i] = permutation[j];
 155 #ifdef ARCH_POWERPC
 156         st->inverse[j] = i;
 157 #endif
 158     }
 159
 160     end=-1;
 161     for(i=0; i<64; i++){
 162         int j;
 163         j = st->permutated[i];
 164         if(j>end) end=j;
 165         st->raster_end[i]= end;
 166     }
 167 }
 168
 169 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 170     int i;
 171
 172     if(matrix){
 173         put_bits(pb, 1, 1);
 174         for(i=0;i<64;i++) {
 175             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 176         }
 177     }else
 178         put_bits(pb, 1, 0);
 179 }
 180
 181 /* init common dct for both encoder and decoder */
 182 int DCT_common_init(MpegEncContext *s)
 183 {
 184     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 185     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 186     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 187
 188 #ifdef CONFIG_ENCODERS
 189     s->dct_quantize= dct_quantize_c;
 190 #endif
 191
 192 #ifdef HAVE_MMX
 193     MPV_common_init_mmx(s);
 194 #endif
 195 #ifdef ARCH_ALPHA
 196     MPV_common_init_axp(s);
 197 #endif
 198 #ifdef HAVE_MLIB
 199     MPV_common_init_mlib(s);
 200 #endif
 201 #ifdef HAVE_MMI
 202     MPV_common_init_mmi(s);
 203 #endif
 204 #ifdef ARCH_ARMV4L
 205     MPV_common_init_armv4l(s);
 206 #endif
 207 #ifdef ARCH_POWERPC
 208     MPV_common_init_ppc(s);
 209 #endif
 210
 211 #ifdef CONFIG_ENCODERS
 212     s->fast_dct_quantize= s->dct_quantize;
 213
 214     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 215         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 216     }
 217
 218 #endif //CONFIG_ENCODERS
 219
 220     /* load & permutate scantables
 221        note: only wmv uses differnt ones
 222     */
 223     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 224     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 225     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 226     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 227
 228     s->picture_structure= PICT_FRAME;
 229
 230     return 0;
 231 }
 232
 233 /**
 234  * allocates a Picture
 235  * The pixels are allocated/set by calling get_buffer() if shared=0
 236  */
 237 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 238     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 239     const int mb_array_size= s->mb_stride*s->mb_height;
 240     int i;
 241
 242     if(shared){
 243         assert(pic->data[0]);
 244         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 245         pic->type= FF_BUFFER_TYPE_SHARED;
 246     }else{
 247         int r;
 248
 249         assert(!pic->data[0]);
 250
 251         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 252
 253         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 254             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 255             return -1;
 256         }
 257
 258         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 259             fprintf(stderr, "get_buffer() failed (stride changed)\n");
 260             return -1;
 261         }
 262
 263         if(pic->linesize[1] != pic->linesize[2]){
 264             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
 265             return -1;
 266         }
 267
 268         s->linesize  = pic->linesize[0];
 269         s->uvlinesize= pic->linesize[1];
 270     }
 271
 272     if(pic->qscale_table==NULL){
 273         if (s->encoding) {
 274             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 275             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 276             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 277             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
 278         }
 279
 280         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 281         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 282         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
 283         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 284         if(s->out_format == FMT_H264){
 285             for(i=0; i<2; i++){
 286                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
 287                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
 288             }
 289         }
 290         pic->qstride= s->mb_stride;
 291     }
 292
 293     //it might be nicer if the application would keep track of these but it would require a API change
 294     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 295     s->prev_pict_types[0]= s->pict_type;
 296     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 297         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 298
 299     return 0;
 300 fail: //for the CHECKED_ALLOCZ macro
 301     return -1;
 302 }
 303
 304 /**
 305  * deallocates a picture
 306  */
 307 static void free_picture(MpegEncContext *s, Picture *pic){
 308     int i;
 309
 310     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 311         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 312     }
 313
 314     av_freep(&pic->mb_var);
 315     av_freep(&pic->mc_mb_var);
 316     av_freep(&pic->mb_mean);
 317     av_freep(&pic->mb_cmp_score);
 318     av_freep(&pic->mbskip_table);
 319     av_freep(&pic->qscale_table);
 320     av_freep(&pic->mb_type_base);
 321     pic->mb_type= NULL;
 322     for(i=0; i<2; i++){
 323         av_freep(&pic->motion_val[i]);
 324         av_freep(&pic->ref_index[i]);
 325     }
 326
 327     if(pic->type == FF_BUFFER_TYPE_SHARED){
 328         for(i=0; i<4; i++){
 329             pic->base[i]=
 330             pic->data[i]= NULL;
 331         }
 332         pic->type= 0;
 333     }
 334 }
 335
 336 /* init common structure for both encoder and decoder */
 337 int MPV_common_init(MpegEncContext *s)
 338 {
 339     int y_size, c_size, yc_size, i, mb_array_size, x, y;
 340
 341     dsputil_init(&s->dsp, s->avctx);
 342     DCT_common_init(s);
 343
 344     s->flags= s->avctx->flags;
 345
 346     s->mb_width  = (s->width  + 15) / 16;
 347     s->mb_height = (s->height + 15) / 16;
 348     s->mb_stride = s->mb_width + 1;
 349     mb_array_size= s->mb_height * s->mb_stride;
 350
 351     /* set default edge pos, will be overriden in decode_header if needed */
 352     s->h_edge_pos= s->mb_width*16;
 353     s->v_edge_pos= s->mb_height*16;
 354
 355     s->mb_num = s->mb_width * s->mb_height;
 356
 357     s->block_wrap[0]=
 358     s->block_wrap[1]=
 359     s->block_wrap[2]=
 360     s->block_wrap[3]= s->mb_width*2 + 2;
 361     s->block_wrap[4]=
 362     s->block_wrap[5]= s->mb_width + 2;
 363
 364     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 365     c_size = (s->mb_width + 2) * (s->mb_height + 2);
 366     yc_size = y_size + 2 * c_size;
 367
 368     /* convert fourcc to upper case */
 369     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 370                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 371                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 372                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 373
 374     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 375                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 376                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 377                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 378
 379     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 380     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 381
 382     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 383
 384     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 385     for(y=0; y<s->mb_height; y++){
 386         for(x=0; x<s->mb_width; x++){
 387             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 388         }
 389     }
 390     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 391
 392     if (s->encoding) {
 393         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
 394
 395         /* Allocate MV tables */
 396         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 397         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 398         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 399         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 400         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 401         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 402         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 403         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 404         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 405         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 406         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 407         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 408
 409         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 410         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t))
 411
 412         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 413         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 414
 415         if(s->codec_id==CODEC_ID_MPEG4){
 416             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 417             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 418         }
 419
 420         if(s->msmpeg4_version){
 421             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 422         }
 423         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 424
 425         /* Allocate MB type table */
 426         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
 427     }
 428
 429     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 430
 431     if (s->out_format == FMT_H263 || s->encoding) {
 432         int size;
 433
 434         /* MV prediction */
 435         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 436         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
 437     }
 438
 439     if(s->codec_id==CODEC_ID_MPEG4){
 440         /* interlaced direct mode decoding tables */
 441         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
 442         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
 443     }
 444     if (s->out_format == FMT_H263) {
 445         /* ac values */
 446         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
 447         s->ac_val[1] = s->ac_val[0] + y_size;
 448         s->ac_val[2] = s->ac_val[1] + c_size;
 449
 450         /* cbp values */
 451         CHECKED_ALLOCZ(s->coded_block, y_size);
 452
 453         /* divx501 bitstream reorder buffer */
 454         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 455
 456         /* cbp, ac_pred, pred_dir */
 457         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 458         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 459     }
 460
 461     if (s->h263_pred || s->h263_plus || !s->encoding) {
 462         /* dc values */
 463         //MN: we need these for error resilience of intra-frames
 464         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
 465         s->dc_val[1] = s->dc_val[0] + y_size;
 466         s->dc_val[2] = s->dc_val[1] + c_size;
 467         for(i=0;i<yc_size;i++)
 468             s->dc_val[0][i] = 1024;
 469     }
 470
 471     /* which mb is a intra block */
 472     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 473     memset(s->mbintra_table, 1, mb_array_size);
 474
 475     /* default structure is frame */
 476     s->picture_structure = PICT_FRAME;
 477
 478     /* init macroblock skip table */
 479     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 480     //Note the +1 is for a quicker mpeg4 slice_end detection
 481     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 482
 483     s->block= s->blocks[0];
 484
 485     s->parse_context.state= -1;
 486
 487     s->context_initialized = 1;
 488     return 0;
 489  fail:
 490     MPV_common_end(s);
 491     return -1;
 492 }
 493
 494
 495 //extern int sads;
 496
 497 /* init common structure for both encoder and decoder */
 498 void MPV_common_end(MpegEncContext *s)
 499 {
 500     int i;
 501
 502     av_freep(&s->parse_context.buffer);
 503     s->parse_context.buffer_size=0;
 504
 505     av_freep(&s->mb_type);
 506     av_freep(&s->p_mv_table_base);
 507     av_freep(&s->b_forw_mv_table_base);
 508     av_freep(&s->b_back_mv_table_base);
 509     av_freep(&s->b_bidir_forw_mv_table_base);
 510     av_freep(&s->b_bidir_back_mv_table_base);
 511     av_freep(&s->b_direct_mv_table_base);
 512     s->p_mv_table= NULL;
 513     s->b_forw_mv_table= NULL;
 514     s->b_back_mv_table= NULL;
 515     s->b_bidir_forw_mv_table= NULL;
 516     s->b_bidir_back_mv_table= NULL;
 517     s->b_direct_mv_table= NULL;
 518
 519     av_freep(&s->motion_val);
 520     av_freep(&s->dc_val[0]);
 521     av_freep(&s->ac_val[0]);
 522     av_freep(&s->coded_block);
 523     av_freep(&s->mbintra_table);
 524     av_freep(&s->cbp_table);
 525     av_freep(&s->pred_dir_table);
 526     av_freep(&s->me.scratchpad);
 527     av_freep(&s->me.map);
 528     av_freep(&s->me.score_map);
 529
 530     av_freep(&s->mbskip_table);
 531     av_freep(&s->prev_pict_types);
 532     av_freep(&s->bitstream_buffer);
 533     av_freep(&s->tex_pb_buffer);
 534     av_freep(&s->pb2_buffer);
 535     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 536     av_freep(&s->field_mv_table);
 537     av_freep(&s->field_select_table);
 538     av_freep(&s->avctx->stats_out);
 539     av_freep(&s->ac_stats);
 540     av_freep(&s->error_status_table);
 541     av_freep(&s->mb_index2xy);
 542
 543     for(i=0; i<MAX_PICTURE_COUNT; i++){
 544         free_picture(s, &s->picture[i]);
 545     }
 546     avcodec_default_free_buffers(s->avctx);
 547     s->context_initialized = 0;
 548 }
 549
 550 #ifdef CONFIG_ENCODERS
 551
 552 /* init video encoder */
 553 int MPV_encode_init(AVCodecContext *avctx)
 554 {
 555     MpegEncContext *s = avctx->priv_data;
 556     int i, dummy;
 557     int chroma_h_shift, chroma_v_shift;
 558
 559     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
 560
 561     s->bit_rate = avctx->bit_rate;
 562     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 563     s->width = avctx->width;
 564     s->height = avctx->height;
 565     if(avctx->gop_size > 600){
 566         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 567         avctx->gop_size=600;
 568     }
 569     s->gop_size = avctx->gop_size;
 570     s->rtp_mode = avctx->rtp_mode;
 571     s->rtp_payload_size = avctx->rtp_payload_size;
 572     if (avctx->rtp_callback)
 573         s->rtp_callback = avctx->rtp_callback;
 574     s->max_qdiff= avctx->max_qdiff;
 575     s->qcompress= avctx->qcompress;
 576     s->qblur= avctx->qblur;
 577     s->avctx = avctx;
 578     s->flags= avctx->flags;
 579     s->max_b_frames= avctx->max_b_frames;
 580     s->b_frame_strategy= avctx->b_frame_strategy;
 581     s->codec_id= avctx->codec->id;
 582     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 583     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 584     s->strict_std_compliance= avctx->strict_std_compliance;
 585     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 586     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 587     s->mpeg_quant= avctx->mpeg_quant;
 588
 589     if (s->gop_size <= 1) {
 590         s->intra_only = 1;
 591         s->gop_size = 12;
 592     } else {
 593         s->intra_only = 0;
 594     }
 595
 596     s->me_method = avctx->me_method;
 597
 598     /* Fixed QSCALE */
 599     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 600
 601     s->adaptive_quant= (   s->avctx->lumi_masking
 602                         || s->avctx->dark_masking
 603                         || s->avctx->temporal_cplx_masking
 604                         || s->avctx->spatial_cplx_masking
 605                         || s->avctx->p_masking)
 606                        && !s->fixed_qscale;
 607
 608     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 609
 610     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
 611         fprintf(stderr, "4MV not supporetd by codec\n");
 612         return -1;
 613     }
 614
 615     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
 616         fprintf(stderr, "qpel not supporetd by codec\n");
 617         return -1;
 618     }
 619
 620     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
 621         fprintf(stderr, "data partitioning not supporetd by codec\n");
 622         return -1;
 623     }
 624
 625     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
 626         fprintf(stderr, "b frames not supporetd by codec\n");
 627         return -1;
 628     }
 629
 630     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
 631         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
 632         return -1;
 633     }
 634
 635     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
 636         fprintf(stderr, "CBP RD needs trellis quant\n");
 637         return -1;
 638     }
 639
 640     if(s->codec_id==CODEC_ID_MJPEG){
 641         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
 642         s->inter_quant_bias= 0;
 643     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
 644         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
 645         s->inter_quant_bias= 0;
 646     }else{
 647         s->intra_quant_bias=0;
 648         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
 649     }
 650
 651     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 652         s->intra_quant_bias= avctx->intra_quant_bias;
 653     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 654         s->inter_quant_bias= avctx->inter_quant_bias;
 655
 656     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 657
 658     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
 659     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
 660
 661     switch(avctx->codec->id) {
 662     case CODEC_ID_MPEG1VIDEO:
 663         s->out_format = FMT_MPEG1;
 664         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
 665         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 666         break;
 667     case CODEC_ID_MPEG2VIDEO:
 668         s->out_format = FMT_MPEG1;
 669         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
 670         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 671         s->rtp_mode= 1; // mpeg2 must have slices
 672         if(s->rtp_payload_size == 0) s->rtp_payload_size= 256*256*256;
 673         break;
 674     case CODEC_ID_LJPEG:
 675     case CODEC_ID_MJPEG:
 676         s->out_format = FMT_MJPEG;
 677         s->intra_only = 1; /* force intra only for jpeg */
 678         s->mjpeg_write_tables = 1; /* write all tables */
 679         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 680         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
 681         s->mjpeg_vsample[1] = 1;
 682         s->mjpeg_vsample[2] = 1;
 683         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
 684         s->mjpeg_hsample[1] = 1;
 685         s->mjpeg_hsample[2] = 1;
 686         if (mjpeg_init(s) < 0)
 687             return -1;
 688         avctx->delay=0;
 689         s->low_delay=1;
 690         break;
 691 #ifdef CONFIG_RISKY
 692     case CODEC_ID_H263:
 693         if (h263_get_picture_format(s->width, s->height) == 7) {
 694             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 695             return -1;
 696         }
 697         s->out_format = FMT_H263;
 698         avctx->delay=0;
 699         s->low_delay=1;
 700         break;
 701     case CODEC_ID_H263P:
 702         s->out_format = FMT_H263;
 703         s->h263_plus = 1;
 704         /* Fx */
 705         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
 706         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
 707         /* /Fx */
 708         /* These are just to be sure */
 709         s->umvplus = 1;
 710         avctx->delay=0;
 711         s->low_delay=1;
 712         break;
 713     case CODEC_ID_FLV1:
 714         s->out_format = FMT_H263;
 715         s->h263_flv = 2; /* format = 1; 11-bit codes */
 716         s->unrestricted_mv = 1;
 717         s->rtp_mode=0; /* don't allow GOB */
 718         avctx->delay=0;
 719         s->low_delay=1;
 720         break;
 721     case CODEC_ID_RV10:
 722         s->out_format = FMT_H263;
 723         s->h263_rv10 = 1;
 724         avctx->delay=0;
 725         s->low_delay=1;
 726         break;
 727     case CODEC_ID_MPEG4:
 728         s->out_format = FMT_H263;
 729         s->h263_pred = 1;
 730         s->unrestricted_mv = 1;
 731         s->low_delay= s->max_b_frames ? 0 : 1;
 732         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 733         break;
 734     case CODEC_ID_MSMPEG4V1:
 735         s->out_format = FMT_H263;
 736         s->h263_msmpeg4 = 1;
 737         s->h263_pred = 1;
 738         s->unrestricted_mv = 1;
 739         s->msmpeg4_version= 1;
 740         avctx->delay=0;
 741         s->low_delay=1;
 742         break;
 743     case CODEC_ID_MSMPEG4V2:
 744         s->out_format = FMT_H263;
 745         s->h263_msmpeg4 = 1;
 746         s->h263_pred = 1;
 747         s->unrestricted_mv = 1;
 748         s->msmpeg4_version= 2;
 749         avctx->delay=0;
 750         s->low_delay=1;
 751         break;
 752     case CODEC_ID_MSMPEG4V3:
 753         s->out_format = FMT_H263;
 754         s->h263_msmpeg4 = 1;
 755         s->h263_pred = 1;
 756         s->unrestricted_mv = 1;
 757         s->msmpeg4_version= 3;
 758         s->flipflop_rounding=1;
 759         avctx->delay=0;
 760         s->low_delay=1;
 761         break;
 762     case CODEC_ID_WMV1:
 763         s->out_format = FMT_H263;
 764         s->h263_msmpeg4 = 1;
 765         s->h263_pred = 1;
 766         s->unrestricted_mv = 1;
 767         s->msmpeg4_version= 4;
 768         s->flipflop_rounding=1;
 769         avctx->delay=0;
 770         s->low_delay=1;
 771         break;
 772     case CODEC_ID_WMV2:
 773         s->out_format = FMT_H263;
 774         s->h263_msmpeg4 = 1;
 775         s->h263_pred = 1;
 776         s->unrestricted_mv = 1;
 777         s->msmpeg4_version= 5;
 778         s->flipflop_rounding=1;
 779         avctx->delay=0;
 780         s->low_delay=1;
 781         break;
 782 #endif
 783     default:
 784         return -1;
 785     }
 786
 787     { /* set up some save defaults, some codecs might override them later */
 788         static int done=0;
 789         if(!done){
 790             int i;
 791             done=1;
 792
 793             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 794             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
 795             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 796
 797             for(i=-16; i<16; i++){
 798                 default_fcode_tab[i + MAX_MV]= 1;
 799             }
 800         }
 801     }
 802     s->me.mv_penalty= default_mv_penalty;
 803     s->fcode_tab= default_fcode_tab;
 804     s->y_dc_scale_table=
 805     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 806
 807     /* dont use mv_penalty table for crap MV as it would be confused */
 808     //FIXME remove after fixing / removing old ME
 809     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
 810
 811     s->encoding = 1;
 812
 813     /* init */
 814     if (MPV_common_init(s) < 0)
 815         return -1;
 816
 817     ff_init_me(s);
 818
 819 #ifdef CONFIG_ENCODERS
 820 #ifdef CONFIG_RISKY
 821     if (s->out_format == FMT_H263)
 822         h263_encode_init(s);
 823     if(s->msmpeg4_version)
 824         ff_msmpeg4_encode_init(s);
 825 #endif
 826     if (s->out_format == FMT_MPEG1)
 827         ff_mpeg1_encode_init(s);
 828 #endif
 829
 830     /* init default q matrix */
 831     for(i=0;i<64;i++) {
 832         int j= s->dsp.idct_permutation[i];
 833 #ifdef CONFIG_RISKY
 834         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 835             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 836             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 837         }else if(s->out_format == FMT_H263){
 838             s->intra_matrix[j] =
 839             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 840         }else
 841 #endif
 842         { /* mpeg1/2 */
 843             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 844             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 845         }
 846         if(s->avctx->intra_matrix)
 847             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 848         if(s->avctx->inter_matrix)
 849             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 850     }
 851
 852     /* precompute matrix */
 853     /* for mjpeg, we do include qscale in the matrix */
 854     if (s->out_format != FMT_MJPEG) {
 855         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 856                        s->intra_matrix, s->intra_quant_bias, 1, 31);
 857         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 858                        s->inter_matrix, s->inter_quant_bias, 1, 31);
 859     }
 860
 861     if(ff_rate_control_init(s) < 0)
 862         return -1;
 863
 864     s->picture_number = 0;
 865     s->picture_in_gop_number = 0;
 866     s->fake_picture_number = 0;
 867     /* motion detector init */
 868     s->f_code = 1;
 869     s->b_code = 1;
 870
 871     return 0;
 872 }
 873
 874 int MPV_encode_end(AVCodecContext *avctx)
 875 {
 876     MpegEncContext *s = avctx->priv_data;
 877
 878 #ifdef STATS
 879     print_stats();
 880 #endif
 881
 882     ff_rate_control_uninit(s);
 883
 884     MPV_common_end(s);
 885     if (s->out_format == FMT_MJPEG)
 886         mjpeg_close(s);
 887
 888     av_freep(&avctx->extradata);
 889
 890     return 0;
 891 }
 892
 893 #endif //CONFIG_ENCODERS
 894
 895 void init_rl(RLTable *rl)
 896 {
 897     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 898     uint8_t index_run[MAX_RUN+1];
 899     int last, run, level, start, end, i;
 900
 901     /* compute max_level[], max_run[] and index_run[] */
 902     for(last=0;last<2;last++) {
 903         if (last == 0) {
 904             start = 0;
 905             end = rl->last;
 906         } else {
 907             start = rl->last;
 908             end = rl->n;
 909         }
 910
 911         memset(max_level, 0, MAX_RUN + 1);
 912         memset(max_run, 0, MAX_LEVEL + 1);
 913         memset(index_run, rl->n, MAX_RUN + 1);
 914         for(i=start;i<end;i++) {
 915             run = rl->table_run[i];
 916             level = rl->table_level[i];
 917             if (index_run[run] == rl->n)
 918                 index_run[run] = i;
 919             if (level > max_level[run])
 920                 max_level[run] = level;
 921             if (run > max_run[level])
 922                 max_run[level] = run;
 923         }
 924         rl->max_level[last] = av_malloc(MAX_RUN + 1);
 925         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 926         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 927         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 928         rl->index_run[last] = av_malloc(MAX_RUN + 1);
 929         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 930     }
 931 }
 932
 933 /* draw the edges of width 'w' of an image of size width, height */
 934 //FIXME check that this is ok for mpeg4 interlaced
 935 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 936 {
 937     uint8_t *ptr, *last_line;
 938     int i;
 939
 940     last_line = buf + (height - 1) * wrap;
 941     for(i=0;i<w;i++) {
 942         /* top and bottom */
 943         memcpy(buf - (i + 1) * wrap, buf, width);
 944         memcpy(last_line + (i + 1) * wrap, last_line, width);
 945     }
 946     /* left and right */
 947     ptr = buf;
 948     for(i=0;i<height;i++) {
 949         memset(ptr - w, ptr[0], w);
 950         memset(ptr + width, ptr[width-1], w);
 951         ptr += wrap;
 952     }
 953     /* corners */
 954     for(i=0;i<w;i++) {
 955         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 956         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 957         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 958         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 959     }
 960 }
 961
 962 static int find_unused_picture(MpegEncContext *s, int shared){
 963     int i;
 964
 965     if(shared){
 966         for(i=0; i<MAX_PICTURE_COUNT; i++){
 967             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
 968         }
 969     }else{
 970         for(i=0; i<MAX_PICTURE_COUNT; i++){
 971             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
 972         }
 973         for(i=0; i<MAX_PICTURE_COUNT; i++){
 974             if(s->picture[i].data[0]==NULL) break;
 975         }
 976     }
 977
 978     assert(i<MAX_PICTURE_COUNT);
 979     return i;
 980 }
 981
 982 /* generic function for encode/decode called before a frame is coded/decoded */
 983 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 984 {
 985     int i;
 986     AVFrame *pic;
 987
 988     s->mb_skiped = 0;
 989
 990     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 991
 992     /* mark&release old frames */
 993     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
 994         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 995
 996         /* release forgotten pictures */
 997         /* if(mpeg124/h263) */
 998         if(!s->encoding){
 999             for(i=0; i<MAX_PICTURE_COUNT; i++){
1000                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1001                     fprintf(stderr, "releasing zombie picture\n");
1002                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1003                 }
1004             }
1005         }
1006     }
1007 alloc:
1008     if(!s->encoding){
1009         /* release non refernce frames */
1010         for(i=0; i<MAX_PICTURE_COUNT; i++){
1011             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1012                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1013             }
1014         }
1015
1016         i= find_unused_picture(s, 0);
1017
1018         pic= (AVFrame*)&s->picture[i];
1019         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
1020
1021         if(s->current_picture_ptr)
1022             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
1023
1024         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1025             return -1;
1026
1027         s->current_picture_ptr= &s->picture[i];
1028     }
1029
1030     s->current_picture_ptr->pict_type= s->pict_type;
1031     s->current_picture_ptr->quality= s->qscale;
1032     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1033
1034     s->current_picture= *s->current_picture_ptr;
1035
1036   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1037     if (s->pict_type != B_TYPE) {
1038         s->last_picture_ptr= s->next_picture_ptr;
1039         s->next_picture_ptr= s->current_picture_ptr;
1040     }
1041
1042     if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
1043     if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
1044     if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
1045
1046     if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
1047         fprintf(stderr, "warning: first frame is no keyframe\n");
1048         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1049         goto alloc;
1050     }
1051
1052     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1053
1054     if(s->picture_structure!=PICT_FRAME){
1055         int i;
1056         for(i=0; i<4; i++){
1057             if(s->picture_structure == PICT_BOTTOM_FIELD){
1058                  s->current_picture.data[i] += s->current_picture.linesize[i];
1059             }
1060             s->current_picture.linesize[i] *= 2;
1061             s->last_picture.linesize[i] *=2;
1062             s->next_picture.linesize[i] *=2;
1063         }
1064     }
1065   }
1066
1067     s->hurry_up= s->avctx->hurry_up;
1068     s->error_resilience= avctx->error_resilience;
1069
1070     /* set dequantizer, we cant do it during init as it might change for mpeg4
1071        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1072     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO)
1073         s->dct_unquantize = s->dct_unquantize_mpeg2;
1074     else if(s->out_format == FMT_H263)
1075         s->dct_unquantize = s->dct_unquantize_h263;
1076     else
1077         s->dct_unquantize = s->dct_unquantize_mpeg1;
1078
1079 #ifdef HAVE_XVMC
1080     if(s->avctx->xvmc_acceleration)
1081         return XVMC_field_start(s, avctx);
1082 #endif
1083     return 0;
1084 }
1085
1086 /* generic function for encode/decode called after a frame has been coded/decoded */
1087 void MPV_frame_end(MpegEncContext *s)
1088 {
1089     int i;
1090     /* draw edge for correct motion prediction if outside */
1091 #ifdef HAVE_XVMC
1092 //just to make sure that all data is rendered.
1093     if(s->avctx->xvmc_acceleration){
1094         XVMC_field_end(s);
1095     }else
1096 #endif
1097     if(s->codec_id!=CODEC_ID_SVQ1 && s->out_format != FMT_MPEG1){
1098         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1099             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1100             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1101             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1102         }
1103     }
1104     emms_c();
1105
1106     s->last_pict_type    = s->pict_type;
1107     if(s->pict_type!=B_TYPE){
1108         s->last_non_b_pict_type= s->pict_type;
1109     }
1110 #if 0
1111         /* copy back current_picture variables */
1112     for(i=0; i<MAX_PICTURE_COUNT; i++){
1113         if(s->picture[i].data[0] == s->current_picture.data[0]){
1114             s->picture[i]= s->current_picture;
1115             break;
1116         }
1117     }
1118     assert(i<MAX_PICTURE_COUNT);
1119 #endif
1120
1121     if(s->encoding){
1122         /* release non refernce frames */
1123         for(i=0; i<MAX_PICTURE_COUNT; i++){
1124             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1125                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1126             }
1127         }
1128     }
1129     // clear copies, to avoid confusion
1130 #if 0
1131     memset(&s->last_picture, 0, sizeof(Picture));
1132     memset(&s->next_picture, 0, sizeof(Picture));
1133     memset(&s->current_picture, 0, sizeof(Picture));
1134 #endif
1135 }
1136
1137 /**
1138  * draws an line from (ex, ey) -> (sx, sy).
1139  * @param w width of the image
1140  * @param h height of the image
1141  * @param stride stride/linesize of the image
1142  * @param color color of the arrow
1143  */
1144 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1145     int t, x, y, f;
1146
1147     sx= clip(sx, 0, w-1);
1148     sy= clip(sy, 0, h-1);
1149     ex= clip(ex, 0, w-1);
1150     ey= clip(ey, 0, h-1);
1151
1152     buf[sy*stride + sx]+= color;
1153
1154     if(ABS(ex - sx) > ABS(ey - sy)){
1155         if(sx > ex){
1156             t=sx; sx=ex; ex=t;
1157             t=sy; sy=ey; ey=t;
1158         }
1159         buf+= sx + sy*stride;
1160         ex-= sx;
1161         f= ((ey-sy)<<16)/ex;
1162         for(x= 0; x <= ex; x++){
1163             y= ((x*f) + (1<<15))>>16;
1164             buf[y*stride + x]+= color;
1165         }
1166     }else{
1167         if(sy > ey){
1168             t=sx; sx=ex; ex=t;
1169             t=sy; sy=ey; ey=t;
1170         }
1171         buf+= sx + sy*stride;
1172         ey-= sy;
1173         if(ey) f= ((ex-sx)<<16)/ey;
1174         else   f= 0;
1175         for(y= 0; y <= ey; y++){
1176             x= ((y*f) + (1<<15))>>16;
1177             buf[y*stride + x]+= color;
1178         }
1179     }
1180 }
1181
1182 /**
1183  * draws an arrow from (ex, ey) -> (sx, sy).
1184  * @param w width of the image
1185  * @param h height of the image
1186  * @param stride stride/linesize of the image
1187  * @param color color of the arrow
1188  */
1189 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1190     int dx,dy;
1191
1192     sx= clip(sx, -100, w+100);
1193     sy= clip(sy, -100, h+100);
1194     ex= clip(ex, -100, w+100);
1195     ey= clip(ey, -100, h+100);
1196
1197     dx= ex - sx;
1198     dy= ey - sy;
1199
1200     if(dx*dx + dy*dy > 3*3){
1201         int rx=  dx + dy;
1202         int ry= -dx + dy;
1203         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1204
1205         //FIXME subpixel accuracy
1206         rx= ROUNDED_DIV(rx*3<<4, length);
1207         ry= ROUNDED_DIV(ry*3<<4, length);
1208
1209         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1210         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1211     }
1212     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1213 }
1214
1215 /**
1216  * prints debuging info for the given picture.
1217  */
1218 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1219
1220     if(!pict || !pict->mb_type) return;
1221
1222     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1223         int x,y;
1224
1225         for(y=0; y<s->mb_height; y++){
1226             for(x=0; x<s->mb_width; x++){
1227                 if(s->avctx->debug&FF_DEBUG_SKIP){
1228                     int count= s->mbskip_table[x + y*s->mb_stride];
1229                     if(count>9) count=9;
1230                     printf("%1d", count);
1231                 }
1232                 if(s->avctx->debug&FF_DEBUG_QP){
1233                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1234                 }
1235                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1236                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1237
1238                     //Type & MV direction
1239                     if(IS_PCM(mb_type))
1240                         printf("P");
1241                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1242                         printf("A");
1243                     else if(IS_INTRA4x4(mb_type))
1244                         printf("i");
1245                     else if(IS_INTRA16x16(mb_type))
1246                         printf("I");
1247                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1248                         printf("d");
1249                     else if(IS_DIRECT(mb_type))
1250                         printf("D");
1251                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1252                         printf("g");
1253                     else if(IS_GMC(mb_type))
1254                         printf("G");
1255                     else if(IS_SKIP(mb_type))
1256                         printf("S");
1257                     else if(!USES_LIST(mb_type, 1))
1258                         printf(">");
1259                     else if(!USES_LIST(mb_type, 0))
1260                         printf("<");
1261                     else{
1262                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1263                         printf("X");
1264                     }
1265
1266                     //segmentation
1267                     if(IS_8X8(mb_type))
1268                         printf("+");
1269                     else if(IS_16X8(mb_type))
1270                         printf("-");
1271                     else if(IS_8X16(mb_type))
1272                         printf("¦");
1273                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1274                         printf(" ");
1275                     else
1276                         printf("?");
1277
1278
1279                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1280                         printf("=");
1281                     else
1282                         printf(" ");
1283                 }
1284 //                printf(" ");
1285             }
1286             printf("\n");
1287         }
1288     }
1289
1290     if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
1291         const int shift= 1 + s->quarter_sample;
1292         int mb_y;
1293         uint8_t *ptr= pict->data[0];
1294         s->low_delay=0; //needed to see the vectors without trashing the buffers
1295
1296         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1297             int mb_x;
1298             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1299                 const int mb_index= mb_x + mb_y*s->mb_stride;
1300                 if(IS_8X8(s->current_picture.mb_type[mb_index])){
1301                     int i;
1302                     for(i=0; i<4; i++){
1303                         int sx= mb_x*16 + 4 + 8*(i&1);
1304                         int sy= mb_y*16 + 4 + 8*(i>>1);
1305                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1306                         int mx= (s->motion_val[xy][0]>>shift) + sx;
1307                         int my= (s->motion_val[xy][1]>>shift) + sy;
1308                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1309                     }
1310                 }else{
1311                     int sx= mb_x*16 + 8;
1312                     int sy= mb_y*16 + 8;
1313                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1314                     int mx= (s->motion_val[xy][0]>>shift) + sx;
1315                     int my= (s->motion_val[xy][1]>>shift) + sy;
1316                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1317                 }
1318                 s->mbskip_table[mb_index]=0;
1319             }
1320         }
1321     }
1322 }
1323
1324 #ifdef CONFIG_ENCODERS
1325
1326 static int get_sae(uint8_t *src, int ref, int stride){
1327     int x,y;
1328     int acc=0;
1329
1330     for(y=0; y<16; y++){
1331         for(x=0; x<16; x++){
1332             acc+= ABS(src[x+y*stride] - ref);
1333         }
1334     }
1335
1336     return acc;
1337 }
1338
1339 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1340     int x, y, w, h;
1341     int acc=0;
1342
1343     w= s->width &~15;
1344     h= s->height&~15;
1345
1346     for(y=0; y<h; y+=16){
1347         for(x=0; x<w; x+=16){
1348             int offset= x + y*stride;
1349             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1350             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1351             int sae = get_sae(src + offset, mean, stride);
1352
1353             acc+= sae + 500 < sad;
1354         }
1355     }
1356     return acc;
1357 }
1358
1359
1360 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1361     AVFrame *pic=NULL;
1362     int i;
1363     const int encoding_delay= s->max_b_frames;
1364     int direct=1;
1365
1366   if(pic_arg){
1367     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1368     if(pic_arg->linesize[0] != s->linesize) direct=0;
1369     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1370     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1371
1372 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1373
1374     if(direct){
1375         i= find_unused_picture(s, 1);
1376
1377         pic= (AVFrame*)&s->picture[i];
1378         pic->reference= 3;
1379
1380         for(i=0; i<4; i++){
1381             pic->data[i]= pic_arg->data[i];
1382             pic->linesize[i]= pic_arg->linesize[i];
1383         }
1384         alloc_picture(s, (Picture*)pic, 1);
1385     }else{
1386         i= find_unused_picture(s, 0);
1387
1388         pic= (AVFrame*)&s->picture[i];
1389         pic->reference= 3;
1390
1391         alloc_picture(s, (Picture*)pic, 0);
1392         for(i=0; i<4; i++){
1393             /* the input will be 16 pixels to the right relative to the actual buffer start
1394              * and the current_pic, so the buffer can be reused, yes its not beatifull
1395              */
1396             pic->data[i]+= 16;
1397         }
1398
1399         if(   pic->data[0] == pic_arg->data[0]
1400            && pic->data[1] == pic_arg->data[1]
1401            && pic->data[2] == pic_arg->data[2]){
1402        // empty
1403         }else{
1404             int h_chroma_shift, v_chroma_shift;
1405             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1406
1407             for(i=0; i<3; i++){
1408                 int src_stride= pic_arg->linesize[i];
1409                 int dst_stride= i ? s->uvlinesize : s->linesize;
1410                 int h_shift= i ? h_chroma_shift : 0;
1411                 int v_shift= i ? v_chroma_shift : 0;
1412                 int w= s->width >>h_shift;
1413                 int h= s->height>>v_shift;
1414                 uint8_t *src= pic_arg->data[i];
1415                 uint8_t *dst= pic->data[i];
1416
1417                 if(src_stride==dst_stride)
1418                     memcpy(dst, src, src_stride*h);
1419                 else{
1420                     while(h--){
1421                         memcpy(dst, src, w);
1422                         dst += dst_stride;
1423                         src += src_stride;
1424                     }
1425                 }
1426             }
1427         }
1428     }
1429     pic->quality= pic_arg->quality;
1430     pic->pict_type= pic_arg->pict_type;
1431     pic->pts = pic_arg->pts;
1432
1433     if(s->input_picture[encoding_delay])
1434         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1435
1436   }
1437
1438     /* shift buffer entries */
1439     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1440         s->input_picture[i-1]= s->input_picture[i];
1441
1442     s->input_picture[encoding_delay]= (Picture*)pic;
1443
1444     return 0;
1445 }
1446
1447 static void select_input_picture(MpegEncContext *s){
1448     int i;
1449     int coded_pic_num=0;
1450
1451     if(s->reordered_input_picture[0])
1452         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1453
1454     for(i=1; i<MAX_PICTURE_COUNT; i++)
1455         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1456     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1457
1458     /* set next picture types & ordering */
1459     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1460         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1461             s->reordered_input_picture[0]= s->input_picture[0];
1462             s->reordered_input_picture[0]->pict_type= I_TYPE;
1463             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1464         }else{
1465             int b_frames;
1466
1467             if(s->flags&CODEC_FLAG_PASS2){
1468                 for(i=0; i<s->max_b_frames+1; i++){
1469                     int pict_num= s->input_picture[0]->display_picture_number + i;
1470                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1471                     s->input_picture[i]->pict_type= pict_type;
1472
1473                     if(i + 1 >= s->rc_context.num_entries) break;
1474                 }
1475             }
1476
1477             if(s->input_picture[0]->pict_type){
1478                 /* user selected pict_type */
1479                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1480                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1481                 }
1482
1483                 if(b_frames > s->max_b_frames){
1484                     fprintf(stderr, "warning, too many bframes in a row\n");
1485                     b_frames = s->max_b_frames;
1486                 }
1487             }else if(s->b_frame_strategy==0){
1488                 b_frames= s->max_b_frames;
1489                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1490             }else if(s->b_frame_strategy==1){
1491                 for(i=1; i<s->max_b_frames+1; i++){
1492                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1493                         s->input_picture[i]->b_frame_score=
1494                             get_intra_count(s, s->input_picture[i  ]->data[0],
1495                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1496                     }
1497                 }
1498                 for(i=0; i<s->max_b_frames; i++){
1499                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1500                 }
1501
1502                 b_frames= FFMAX(0, i-1);
1503
1504                 /* reset scores */
1505                 for(i=0; i<b_frames+1; i++){
1506                     s->input_picture[i]->b_frame_score=0;
1507                 }
1508             }else{
1509                 fprintf(stderr, "illegal b frame strategy\n");
1510                 b_frames=0;
1511             }
1512
1513             emms_c();
1514 //static int b_count=0;
1515 //b_count+= b_frames;
1516 //printf("b_frames: %d\n", b_count);
1517
1518             s->reordered_input_picture[0]= s->input_picture[b_frames];
1519             if(   s->picture_in_gop_number + b_frames >= s->gop_size
1520                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1521                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1522             else
1523                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1524             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1525             for(i=0; i<b_frames; i++){
1526                 coded_pic_num++;
1527                 s->reordered_input_picture[i+1]= s->input_picture[i];
1528                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1529                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1530             }
1531         }
1532     }
1533
1534     if(s->reordered_input_picture[0]){
1535         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1536
1537         s->new_picture= *s->reordered_input_picture[0];
1538
1539         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1540             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1541
1542             int i= find_unused_picture(s, 0);
1543             Picture *pic= &s->picture[i];
1544
1545             /* mark us unused / free shared pic */
1546             for(i=0; i<4; i++)
1547                 s->reordered_input_picture[0]->data[i]= NULL;
1548             s->reordered_input_picture[0]->type= 0;
1549
1550             //FIXME bad, copy * except
1551             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1552             pic->quality   = s->reordered_input_picture[0]->quality;
1553             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1554             pic->reference = s->reordered_input_picture[0]->reference;
1555             pic->pts = s->reordered_input_picture[0]->pts;
1556
1557             alloc_picture(s, pic, 0);
1558
1559             s->current_picture_ptr= pic;
1560         }else{
1561             // input is not a shared pix -> reuse buffer for current_pix
1562
1563             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1564                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1565
1566             s->current_picture_ptr= s->reordered_input_picture[0];
1567             for(i=0; i<4; i++){
1568                 //reverse the +16 we did before storing the input
1569                 s->current_picture_ptr->data[i]-=16;
1570             }
1571         }
1572         s->current_picture= *s->current_picture_ptr;
1573
1574         s->picture_number= s->new_picture.display_picture_number;
1575 //printf("dpn:%d\n", s->picture_number);
1576     }else{
1577        memset(&s->new_picture, 0, sizeof(Picture));
1578     }
1579 }
1580
1581 int MPV_encode_picture(AVCodecContext *avctx,
1582                        unsigned char *buf, int buf_size, void *data)
1583 {
1584     MpegEncContext *s = avctx->priv_data;
1585     AVFrame *pic_arg = data;
1586     int i;
1587
1588     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1589         fprintf(stderr, "this codec supports only YUV420P\n");
1590         return -1;
1591     }
1592
1593     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1594
1595     s->picture_in_gop_number++;
1596
1597     load_input_picture(s, pic_arg);
1598
1599     select_input_picture(s);
1600
1601     /* output? */
1602     if(s->new_picture.data[0]){
1603
1604         s->pict_type= s->new_picture.pict_type;
1605         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1606             s->qscale= (int)(s->new_picture.quality+0.5);
1607             assert(s->qscale);
1608         }
1609 //emms_c();
1610 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1611         MPV_frame_start(s, avctx);
1612
1613         encode_picture(s, s->picture_number);
1614
1615         avctx->real_pict_num  = s->picture_number;
1616         avctx->header_bits = s->header_bits;
1617         avctx->mv_bits     = s->mv_bits;
1618         avctx->misc_bits   = s->misc_bits;
1619         avctx->i_tex_bits  = s->i_tex_bits;
1620         avctx->p_tex_bits  = s->p_tex_bits;
1621         avctx->i_count     = s->i_count;
1622         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1623         avctx->skip_count  = s->skip_count;
1624
1625         MPV_frame_end(s);
1626
1627         if (s->out_format == FMT_MJPEG)
1628             mjpeg_picture_trailer(s);
1629
1630         if(s->flags&CODEC_FLAG_PASS1)
1631             ff_write_pass1_stats(s);
1632
1633         for(i=0; i<4; i++){
1634             avctx->error[i] += s->current_picture_ptr->error[i];
1635         }
1636     }
1637
1638     s->input_picture_number++;
1639
1640     flush_put_bits(&s->pb);
1641     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1642
1643     s->total_bits += s->frame_bits;
1644     avctx->frame_bits  = s->frame_bits;
1645
1646     return pbBufPtr(&s->pb) - s->pb.buf;
1647 }
1648
1649 #endif //CONFIG_ENCODERS
1650
1651 static inline void gmc1_motion(MpegEncContext *s,
1652                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653                                int dest_offset,
1654                                uint8_t **ref_picture, int src_offset)
1655 {
1656     uint8_t *ptr;
1657     int offset, src_x, src_y, linesize, uvlinesize;
1658     int motion_x, motion_y;
1659     int emu=0;
1660
1661     motion_x= s->sprite_offset[0][0];
1662     motion_y= s->sprite_offset[0][1];
1663     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1664     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1665     motion_x<<=(3-s->sprite_warping_accuracy);
1666     motion_y<<=(3-s->sprite_warping_accuracy);
1667     src_x = clip(src_x, -16, s->width);
1668     if (src_x == s->width)
1669         motion_x =0;
1670     src_y = clip(src_y, -16, s->height);
1671     if (src_y == s->height)
1672         motion_y =0;
1673
1674     linesize = s->linesize;
1675     uvlinesize = s->uvlinesize;
1676
1677     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1678
1679     dest_y+=dest_offset;
1680     if(s->flags&CODEC_FLAG_EMU_EDGE){
1681         if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
1682                               || src_y + 17 >= s->v_edge_pos){
1683             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1684             ptr= s->edge_emu_buffer;
1685         }
1686     }
1687
1688     if((motion_x|motion_y)&7){
1689         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1690         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1691     }else{
1692         int dxy;
1693
1694         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1695         if (s->no_rounding){
1696             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1697         }else{
1698             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1699         }
1700     }
1701
1702     if(s->flags&CODEC_FLAG_GRAY) return;
1703
1704     motion_x= s->sprite_offset[1][0];
1705     motion_y= s->sprite_offset[1][1];
1706     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1707     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1708     motion_x<<=(3-s->sprite_warping_accuracy);
1709     motion_y<<=(3-s->sprite_warping_accuracy);
1710     src_x = clip(src_x, -8, s->width>>1);
1711     if (src_x == s->width>>1)
1712         motion_x =0;
1713     src_y = clip(src_y, -8, s->height>>1);
1714     if (src_y == s->height>>1)
1715         motion_y =0;
1716
1717     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1718     ptr = ref_picture[1] + offset;
1719     if(s->flags&CODEC_FLAG_EMU_EDGE){
1720         if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
1721                               || src_y + 9 >= s->v_edge_pos>>1){
1722             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1723             ptr= s->edge_emu_buffer;
1724             emu=1;
1725         }
1726     }
1727     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1728
1729     ptr = ref_picture[2] + offset;
1730     if(emu){
1731         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1732         ptr= s->edge_emu_buffer;
1733     }
1734     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1735
1736     return;
1737 }
1738
1739 static inline void gmc_motion(MpegEncContext *s,
1740                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1741                                int dest_offset,
1742                                uint8_t **ref_picture, int src_offset)
1743 {
1744     uint8_t *ptr;
1745     int linesize, uvlinesize;
1746     const int a= s->sprite_warping_accuracy;
1747     int ox, oy;
1748
1749     linesize = s->linesize;
1750     uvlinesize = s->uvlinesize;
1751
1752     ptr = ref_picture[0] + src_offset;
1753
1754     dest_y+=dest_offset;
1755
1756     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1757     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1758
1759     s->dsp.gmc(dest_y, ptr, linesize, 16,
1760            ox,
1761            oy,
1762            s->sprite_delta[0][0], s->sprite_delta[0][1],
1763            s->sprite_delta[1][0], s->sprite_delta[1][1],
1764            a+1, (1<<(2*a+1)) - s->no_rounding,
1765            s->h_edge_pos, s->v_edge_pos);
1766     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1767            ox + s->sprite_delta[0][0]*8,
1768            oy + s->sprite_delta[1][0]*8,
1769            s->sprite_delta[0][0], s->sprite_delta[0][1],
1770            s->sprite_delta[1][0], s->sprite_delta[1][1],
1771            a+1, (1<<(2*a+1)) - s->no_rounding,
1772            s->h_edge_pos, s->v_edge_pos);
1773
1774     if(s->flags&CODEC_FLAG_GRAY) return;
1775
1776
1777     dest_cb+=dest_offset>>1;
1778     dest_cr+=dest_offset>>1;
1779
1780     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1781     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1782
1783     ptr = ref_picture[1] + (src_offset>>1);
1784     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1785            ox,
1786            oy,
1787            s->sprite_delta[0][0], s->sprite_delta[0][1],
1788            s->sprite_delta[1][0], s->sprite_delta[1][1],
1789            a+1, (1<<(2*a+1)) - s->no_rounding,
1790            s->h_edge_pos>>1, s->v_edge_pos>>1);
1791
1792     ptr = ref_picture[2] + (src_offset>>1);
1793     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1794            ox,
1795            oy,
1796            s->sprite_delta[0][0], s->sprite_delta[0][1],
1797            s->sprite_delta[1][0], s->sprite_delta[1][1],
1798            a+1, (1<<(2*a+1)) - s->no_rounding,
1799            s->h_edge_pos>>1, s->v_edge_pos>>1);
1800 }
1801
1802 /**
1803  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1804  * @param buf destination buffer
1805  * @param src source buffer
1806  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1807  * @param block_w width of block
1808  * @param block_h height of block
1809  * @param src_x x coordinate of the top left sample of the block in the source buffer
1810  * @param src_y y coordinate of the top left sample of the block in the source buffer
1811  * @param w width of the source buffer
1812  * @param h height of the source buffer
1813  */
1814 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1815                                     int src_x, int src_y, int w, int h){
1816     int x, y;
1817     int start_y, start_x, end_y, end_x;
1818
1819     if(src_y>= h){
1820         src+= (h-1-src_y)*linesize;
1821         src_y=h-1;
1822     }else if(src_y<=-block_h){
1823         src+= (1-block_h-src_y)*linesize;
1824         src_y=1-block_h;
1825     }
1826     if(src_x>= w){
1827         src+= (w-1-src_x);
1828         src_x=w-1;
1829     }else if(src_x<=-block_w){
1830         src+= (1-block_w-src_x);
1831         src_x=1-block_w;
1832     }
1833
1834     start_y= FFMAX(0, -src_y);
1835     start_x= FFMAX(0, -src_x);
1836     end_y= FFMIN(block_h, h-src_y);
1837     end_x= FFMIN(block_w, w-src_x);
1838
1839     // copy existing part
1840     for(y=start_y; y<end_y; y++){
1841         for(x=start_x; x<end_x; x++){
1842             buf[x + y*linesize]= src[x + y*linesize];
1843         }
1844     }
1845
1846     //top
1847     for(y=0; y<start_y; y++){
1848         for(x=start_x; x<end_x; x++){
1849             buf[x + y*linesize]= buf[x + start_y*linesize];
1850         }
1851     }
1852
1853     //bottom
1854     for(y=end_y; y<block_h; y++){
1855         for(x=start_x; x<end_x; x++){
1856             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1857         }
1858     }
1859
1860     for(y=0; y<block_h; y++){
1861        //left
1862         for(x=0; x<start_x; x++){
1863             buf[x + y*linesize]= buf[start_x + y*linesize];
1864         }
1865
1866        //right
1867         for(x=end_x; x<block_w; x++){
1868             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1869         }
1870     }
1871 }
1872
1873
1874 /* apply one mpeg motion vector to the three components */
1875 static inline void mpeg_motion(MpegEncContext *s,
1876                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1877                                int dest_offset,
1878                                uint8_t **ref_picture, int src_offset,
1879                                int field_based, op_pixels_func (*pix_op)[4],
1880                                int motion_x, int motion_y, int h)
1881 {
1882     uint8_t *ptr;
1883     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1884     int emu=0;
1885 #if 0
1886 if(s->quarter_sample)
1887 {
1888     motion_x>>=1;
1889     motion_y>>=1;
1890 }
1891 #endif
1892     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1893     src_x = s->mb_x * 16 + (motion_x >> 1);
1894     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1895
1896     /* WARNING: do no forget half pels */
1897     height = s->height >> field_based;
1898     v_edge_pos = s->v_edge_pos >> field_based;
1899     src_x = clip(src_x, -16, s->width);
1900     if (src_x == s->width)
1901         dxy &= ~1;
1902     src_y = clip(src_y, -16, height);
1903     if (src_y == height)
1904         dxy &= ~2;
1905     linesize   = s->current_picture.linesize[0] << field_based;
1906     uvlinesize = s->current_picture.linesize[1] << field_based;
1907     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1908     dest_y += dest_offset;
1909
1910     if(s->flags&CODEC_FLAG_EMU_EDGE){
1911         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1912                               || src_y + (motion_y&1) + h  > v_edge_pos){
1913             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1914                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1915             ptr= s->edge_emu_buffer + src_offset;
1916             emu=1;
1917         }
1918     }
1919     pix_op[0][dxy](dest_y, ptr, linesize, h);
1920
1921     if(s->flags&CODEC_FLAG_GRAY) return;
1922
1923     if (s->out_format == FMT_H263) {
1924         dxy = 0;
1925         if ((motion_x & 3) != 0)
1926             dxy |= 1;
1927         if ((motion_y & 3) != 0)
1928             dxy |= 2;
1929         mx = motion_x >> 2;
1930         my = motion_y >> 2;
1931     } else {
1932         mx = motion_x / 2;
1933         my = motion_y / 2;
1934         dxy = ((my & 1) << 1) | (mx & 1);
1935         mx >>= 1;
1936         my >>= 1;
1937     }
1938
1939     src_x = s->mb_x * 8 + mx;
1940     src_y = s->mb_y * (8 >> field_based) + my;
1941     src_x = clip(src_x, -8, s->width >> 1);
1942     if (src_x == (s->width >> 1))
1943         dxy &= ~1;
1944     src_y = clip(src_y, -8, height >> 1);
1945     if (src_y == (height >> 1))
1946         dxy &= ~2;
1947     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1948     ptr = ref_picture[1] + offset;
1949     if(emu){
1950         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1951                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1952         ptr= s->edge_emu_buffer + (src_offset >> 1);
1953     }
1954     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1955
1956     ptr = ref_picture[2] + offset;
1957     if(emu){
1958         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1959                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1960         ptr= s->edge_emu_buffer + (src_offset >> 1);
1961     }
1962     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1963 }
1964
1965 static inline void qpel_motion(MpegEncContext *s,
1966                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1967                                int dest_offset,
1968                                uint8_t **ref_picture, int src_offset,
1969                                int field_based, op_pixels_func (*pix_op)[4],
1970                                qpel_mc_func (*qpix_op)[16],
1971                                int motion_x, int motion_y, int h)
1972 {
1973     uint8_t *ptr;
1974     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1975     int emu=0;
1976
1977     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1978     src_x = s->mb_x * 16 + (motion_x >> 2);
1979     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1980
1981     height = s->height >> field_based;
1982     v_edge_pos = s->v_edge_pos >> field_based;
1983     src_x = clip(src_x, -16, s->width);
1984     if (src_x == s->width)
1985         dxy &= ~3;
1986     src_y = clip(src_y, -16, height);
1987     if (src_y == height)
1988         dxy &= ~12;
1989     linesize = s->linesize << field_based;
1990     uvlinesize = s->uvlinesize << field_based;
1991     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1992     dest_y += dest_offset;
1993 //printf("%d %d %d\n", src_x, src_y, dxy);
1994
1995     if(s->flags&CODEC_FLAG_EMU_EDGE){
1996         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1997                               || src_y + (motion_y&3) + h  > v_edge_pos){
1998             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,
1999                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2000             ptr= s->edge_emu_buffer + src_offset;
2001             emu=1;
2002         }
2003     }
2004     if(!field_based)
2005         qpix_op[0][dxy](dest_y, ptr, linesize);
2006     else{
2007         //damn interlaced mode
2008         //FIXME boundary mirroring is not exactly correct here
2009         qpix_op[1][dxy](dest_y  , ptr  , linesize);
2010         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
2011     }
2012
2013     if(s->flags&CODEC_FLAG_GRAY) return;
2014
2015     if(field_based){
2016         mx= motion_x/2;
2017         my= motion_y>>1;
2018     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2019         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2020         mx= (motion_x>>1) + rtab[motion_x&7];
2021         my= (motion_y>>1) + rtab[motion_y&7];
2022     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2023         mx= (motion_x>>1)|(motion_x&1);
2024         my= (motion_y>>1)|(motion_y&1);
2025     }else{
2026         mx= motion_x/2;
2027         my= motion_y/2;
2028     }
2029     mx= (mx>>1)|(mx&1);
2030     my= (my>>1)|(my&1);
2031
2032     dxy= (mx&1) | ((my&1)<<1);
2033     mx>>=1;
2034     my>>=1;
2035
2036     src_x = s->mb_x * 8 + mx;
2037     src_y = s->mb_y * (8 >> field_based) + my;
2038     src_x = clip(src_x, -8, s->width >> 1);
2039     if (src_x == (s->width >> 1))
2040         dxy &= ~1;
2041     src_y = clip(src_y, -8, height >> 1);
2042     if (src_y == (height >> 1))
2043         dxy &= ~2;
2044
2045     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2046     ptr = ref_picture[1] + offset;
2047     if(emu){
2048         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2049                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2050         ptr= s->edge_emu_buffer + (src_offset >> 1);
2051     }
2052     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2053
2054     ptr = ref_picture[2] + offset;
2055     if(emu){
2056         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2057                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2058         ptr= s->edge_emu_buffer + (src_offset >> 1);
2059     }
2060     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2061 }
2062
2063 inline int ff_h263_round_chroma(int x){
2064     if (x >= 0)
2065         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2066     else {
2067         x = -x;
2068         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2069     }
2070 }
2071
2072 /**
2073  * motion compesation of a single macroblock
2074  * @param s context
2075  * @param dest_y luma destination pointer
2076  * @param dest_cb chroma cb/u destination pointer
2077  * @param dest_cr chroma cr/v destination pointer
2078  * @param dir direction (0->forward, 1->backward)
2079  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2080  * @param pic_op halfpel motion compensation function (average or put normally)
2081  * @param pic_op qpel motion compensation function (average or put normally)
2082  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2083  */
2084 static inline void MPV_motion(MpegEncContext *s,
2085                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2086                               int dir, uint8_t **ref_picture,
2087                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2088 {
2089     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
2090     int mb_x, mb_y, i;
2091     uint8_t *ptr, *dest;
2092     int emu=0;
2093
2094     mb_x = s->mb_x;
2095     mb_y = s->mb_y;
2096
2097     switch(s->mv_type) {
2098     case MV_TYPE_16X16:
2099 #ifdef CONFIG_RISKY
2100         if(s->mcsel){
2101             if(s->real_sprite_warping_points==1){
2102                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2103                             ref_picture, 0);
2104             }else{
2105                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2106                             ref_picture, 0);
2107             }
2108         }else if(s->quarter_sample){
2109             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2110                         ref_picture, 0,
2111                         0, pix_op, qpix_op,
2112                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2113         }else if(s->mspel){
2114             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2115                         ref_picture, pix_op,
2116                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2117         }else
2118 #endif
2119         {
2120             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2121                         ref_picture, 0,
2122                         0, pix_op,
2123                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2124         }
2125         break;
2126     case MV_TYPE_8X8:
2127         mx = 0;
2128         my = 0;
2129         if(s->quarter_sample){
2130             for(i=0;i<4;i++) {
2131                 motion_x = s->mv[dir][i][0];
2132                 motion_y = s->mv[dir][i][1];
2133
2134                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2135                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2136                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2137
2138                 /* WARNING: do no forget half pels */
2139                 src_x = clip(src_x, -16, s->width);
2140                 if (src_x == s->width)
2141                     dxy &= ~3;
2142                 src_y = clip(src_y, -16, s->height);
2143                 if (src_y == s->height)
2144                     dxy &= ~12;
2145
2146                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2147                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2148                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
2149                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
2150                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2151                         ptr= s->edge_emu_buffer;
2152                     }
2153                 }
2154                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2155                 qpix_op[1][dxy](dest, ptr, s->linesize);
2156
2157                 mx += s->mv[dir][i][0]/2;
2158                 my += s->mv[dir][i][1]/2;
2159             }
2160         }else{
2161             for(i=0;i<4;i++) {
2162                 motion_x = s->mv[dir][i][0];
2163                 motion_y = s->mv[dir][i][1];
2164
2165                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2166                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
2167                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
2168
2169                 /* WARNING: do no forget half pels */
2170                 src_x = clip(src_x, -16, s->width);
2171                 if (src_x == s->width)
2172                     dxy &= ~1;
2173                 src_y = clip(src_y, -16, s->height);
2174                 if (src_y == s->height)
2175                     dxy &= ~2;
2176
2177                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2178                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2179                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
2180                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
2181                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2182                         ptr= s->edge_emu_buffer;
2183                     }
2184                 }
2185                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2186                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
2187
2188                 mx += s->mv[dir][i][0];
2189                 my += s->mv[dir][i][1];
2190             }
2191         }
2192
2193         if(s->flags&CODEC_FLAG_GRAY) break;
2194         /* In case of 8X8, we construct a single chroma motion vector
2195            with a special rounding */
2196         mx= ff_h263_round_chroma(mx);
2197         my= ff_h263_round_chroma(my);
2198         dxy = ((my & 1) << 1) | (mx & 1);
2199         mx >>= 1;
2200         my >>= 1;
2201
2202         src_x = mb_x * 8 + mx;
2203         src_y = mb_y * 8 + my;
2204         src_x = clip(src_x, -8, s->width/2);
2205         if (src_x == s->width/2)
2206             dxy &= ~1;
2207         src_y = clip(src_y, -8, s->height/2);
2208         if (src_y == s->height/2)
2209             dxy &= ~2;
2210
2211         offset = (src_y * (s->uvlinesize)) + src_x;
2212         ptr = ref_picture[1] + offset;
2213         if(s->flags&CODEC_FLAG_EMU_EDGE){
2214                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
2215                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
2216                     ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2217                     ptr= s->edge_emu_buffer;
2218                     emu=1;
2219                 }
2220             }
2221         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2222
2223         ptr = ref_picture[2] + offset;
2224         if(emu){
2225             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2226             ptr= s->edge_emu_buffer;
2227         }
2228         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2229         break;
2230     case MV_TYPE_FIELD:
2231         if (s->picture_structure == PICT_FRAME) {
2232             if(s->quarter_sample){
2233                 /* top field */
2234                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2235                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2236                             1, pix_op, qpix_op,
2237                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2238                 /* bottom field */
2239                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2240                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2241                             1, pix_op, qpix_op,
2242                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2243             }else{
2244                 /* top field */
2245                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2246                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2247                             1, pix_op,
2248                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2249                 /* bottom field */
2250                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2251                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2252                             1, pix_op,
2253                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2254             }
2255         } else {
2256             int offset;
2257             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2258                 offset= s->field_select[dir][0] ? s->linesize : 0;
2259             }else{
2260                 ref_picture= s->current_picture.data;
2261                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2262             }
2263
2264             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2265                         ref_picture, offset,
2266                         0, pix_op,
2267                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2268         }
2269         break;
2270     case MV_TYPE_16X8:{
2271         int offset;
2272          uint8_t ** ref2picture;
2273
2274             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2275                 ref2picture= ref_picture;
2276                 offset= s->field_select[dir][0] ? s->linesize : 0;
2277             }else{
2278                 ref2picture= s->current_picture.data;
2279                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2280             }
2281
2282             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2283                         ref2picture, offset,
2284                         0, pix_op,
2285                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2286
2287
2288             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2289                 ref2picture= ref_picture;
2290                 offset= s->field_select[dir][1] ? s->linesize : 0;
2291             }else{
2292                 ref2picture= s->current_picture.data;
2293                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize;
2294             }
2295             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2296             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2297                         0,
2298                         ref2picture, offset,
2299                         0, pix_op,
2300                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2301         }
2302
2303         break;
2304     case MV_TYPE_DMV:
2305     {
2306     op_pixels_func (*dmv_pix_op)[4];
2307     int offset;
2308
2309         dmv_pix_op = s->dsp.put_pixels_tab;
2310
2311         if(s->picture_structure == PICT_FRAME){
2312             //put top field from top field
2313             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2314                         ref_picture, 0,
2315                         1, dmv_pix_op,
2316                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2317             //put bottom field from bottom field
2318             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2319                         ref_picture, s->linesize,
2320                         1, dmv_pix_op,
2321                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2322
2323             dmv_pix_op = s->dsp.avg_pixels_tab;
2324
2325             //avg top field from bottom field
2326             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2327                         ref_picture, s->linesize,
2328                         1, dmv_pix_op,
2329                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2330             //avg bottom field from top field
2331             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2332                         ref_picture, 0,
2333                         1, dmv_pix_op,
2334                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2335
2336         }else{
2337             offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2338                          s->linesize : 0;
2339
2340             //put field from the same parity
2341             //same parity is never in the same frame
2342             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2343                         ref_picture,offset,
2344                         0,dmv_pix_op,
2345                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2346
2347             // after put we make avg of the same block
2348             dmv_pix_op=s->dsp.avg_pixels_tab;
2349
2350             //opposite parity is always in the same frame if this is second field
2351             if(!s->first_field){
2352                 ref_picture = s->current_picture.data;
2353                 //top field is one linesize from frame beginig
2354                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2355                         -s->linesize : s->linesize;
2356             }else
2357                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2358                         0 : s->linesize;
2359
2360             //avg field from the opposite parity
2361             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2362                         ref_picture, offset,
2363                         0,dmv_pix_op,
2364                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2365         }
2366     }
2367     break;
2368
2369     }
2370 }
2371
2372
2373 /* put block[] to dest[] */
2374 static inline void put_dct(MpegEncContext *s,
2375                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2376 {
2377     s->dct_unquantize(s, block, i, s->qscale);
2378     s->dsp.idct_put (dest, line_size, block);
2379 }
2380
2381 /* add block[] to dest[] */
2382 static inline void add_dct(MpegEncContext *s,
2383                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2384 {
2385     if (s->block_last_index[i] >= 0) {
2386         s->dsp.idct_add (dest, line_size, block);
2387     }
2388 }
2389
2390 static inline void add_dequant_dct(MpegEncContext *s,
2391                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2392 {
2393     if (s->block_last_index[i] >= 0) {
2394         s->dct_unquantize(s, block, i, s->qscale);
2395
2396         s->dsp.idct_add (dest, line_size, block);
2397     }
2398 }
2399
2400 /**
2401  * cleans dc, ac, coded_block for the current non intra MB
2402  */
2403 void ff_clean_intra_table_entries(MpegEncContext *s)
2404 {
2405     int wrap = s->block_wrap[0];
2406     int xy = s->block_index[0];
2407
2408     s->dc_val[0][xy           ] =
2409     s->dc_val[0][xy + 1       ] =
2410     s->dc_val[0][xy     + wrap] =
2411     s->dc_val[0][xy + 1 + wrap] = 1024;
2412     /* ac pred */
2413     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2414     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2415     if (s->msmpeg4_version>=3) {
2416         s->coded_block[xy           ] =
2417         s->coded_block[xy + 1       ] =
2418         s->coded_block[xy     + wrap] =
2419         s->coded_block[xy + 1 + wrap] = 0;
2420     }
2421     /* chroma */
2422     wrap = s->block_wrap[4];
2423     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2424     s->dc_val[1][xy] =
2425     s->dc_val[2][xy] = 1024;
2426     /* ac pred */
2427     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2428     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2429
2430     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2431 }
2432
2433 /* generic function called after a macroblock has been parsed by the
2434    decoder or after it has been encoded by the encoder.
2435
2436    Important variables used:
2437    s->mb_intra : true if intra macroblock
2438    s->mv_dir   : motion vector direction
2439    s->mv_type  : motion vector type
2440    s->mv       : motion vector
2441    s->interlaced_dct : true if interlaced dct used (mpeg2)
2442  */
2443 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2444 {
2445     int mb_x, mb_y;
2446     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2447 #ifdef HAVE_XVMC
2448     if(s->avctx->xvmc_acceleration){
2449         XVMC_decode_mb(s,block);
2450         return;
2451     }
2452 #endif
2453
2454     mb_x = s->mb_x;
2455     mb_y = s->mb_y;
2456
2457     s->current_picture.qscale_table[mb_xy]= s->qscale;
2458
2459     /* update DC predictors for P macroblocks */
2460     if (!s->mb_intra) {
2461         if (s->h263_pred || s->h263_aic) {
2462             if(s->mbintra_table[mb_xy])
2463                 ff_clean_intra_table_entries(s);
2464         } else {
2465             s->last_dc[0] =
2466             s->last_dc[1] =
2467             s->last_dc[2] = 128 << s->intra_dc_precision;
2468         }
2469     }
2470     else if (s->h263_pred || s->h263_aic)
2471         s->mbintra_table[mb_xy]=1;
2472
2473     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2474         uint8_t *dest_y, *dest_cb, *dest_cr;
2475         int dct_linesize, dct_offset;
2476         op_pixels_func (*op_pix)[4];
2477         qpel_mc_func (*op_qpix)[16];
2478         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2479         const int uvlinesize= s->current_picture.linesize[1];
2480
2481         /* avoid copy if macroblock skipped in last frame too */
2482         /* skip only during decoding as we might trash the buffers during encoding a bit */
2483         if(!s->encoding){
2484             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2485             const int age= s->current_picture.age;
2486
2487             assert(age);
2488
2489             if (s->mb_skiped) {
2490                 s->mb_skiped= 0;
2491                 assert(s->pict_type!=I_TYPE);
2492
2493                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2494                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2495
2496                 /* if previous was skipped too, then nothing to do !  */
2497                 if (*mbskip_ptr >= age && s->current_picture.reference){
2498                     return;
2499                 }
2500             } else if(!s->current_picture.reference){
2501                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2502                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2503             } else{
2504                 *mbskip_ptr = 0; /* not skipped */
2505             }
2506         }
2507
2508         if (s->interlaced_dct) {
2509             dct_linesize = linesize * 2;
2510             dct_offset = linesize;
2511         } else {
2512             dct_linesize = linesize;
2513             dct_offset = linesize * 8;
2514         }
2515
2516         dest_y=  s->dest[0];
2517         dest_cb= s->dest[1];
2518         dest_cr= s->dest[2];
2519
2520         if (!s->mb_intra) {
2521             /* motion handling */
2522             /* decoding or more than one mb_type (MC was allready done otherwise) */
2523             if(!s->encoding){
2524                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
2525                     op_pix = s->dsp.put_pixels_tab;
2526                     op_qpix= s->dsp.put_qpel_pixels_tab;
2527                 }else{
2528                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2529                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2530                 }
2531
2532                 if (s->mv_dir & MV_DIR_FORWARD) {
2533                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2534                     op_pix = s->dsp.avg_pixels_tab;
2535                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2536                 }
2537                 if (s->mv_dir & MV_DIR_BACKWARD) {
2538                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2539                 }
2540             }
2541
2542             /* skip dequant / idct if we are really late ;) */
2543             if(s->hurry_up>1) return;
2544
2545             /* add dct residue */
2546             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2547                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2548                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2549                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2550                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2551                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2552
2553                 if(!(s->flags&CODEC_FLAG_GRAY)){
2554                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2555                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2556                 }
2557             } else if(s->codec_id != CODEC_ID_WMV2){
2558                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2559                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2560                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2561                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2562
2563                 if(!(s->flags&CODEC_FLAG_GRAY)){
2564                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2565                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2566                 }
2567             }
2568 #ifdef CONFIG_RISKY
2569             else{
2570                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2571             }
2572 #endif
2573         } else {
2574             /* dct only in intra block */
2575             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2576                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2577                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2578                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2579                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2580
2581                 if(!(s->flags&CODEC_FLAG_GRAY)){
2582                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2583                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2584                 }
2585             }else{
2586                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2587                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2588                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2589                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2590
2591                 if(!(s->flags&CODEC_FLAG_GRAY)){
2592                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2593                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2594                 }
2595             }
2596         }
2597     }
2598 }
2599
2600 #ifdef CONFIG_ENCODERS
2601
2602 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2603 {
2604     static const char tab[64]=
2605         {3,2,2,1,1,1,1,1,
2606          1,1,1,1,1,1,1,1,
2607          1,1,1,1,1,1,1,1,
2608          0,0,0,0,0,0,0,0,
2609          0,0,0,0,0,0,0,0,
2610          0,0,0,0,0,0,0,0,
2611          0,0,0,0,0,0,0,0,
2612          0,0,0,0,0,0,0,0};
2613     int score=0;
2614     int run=0;
2615     int i;
2616     DCTELEM *block= s->block[n];
2617     const int last_index= s->block_last_index[n];
2618     int skip_dc;
2619
2620     if(threshold<0){
2621         skip_dc=0;
2622         threshold= -threshold;
2623     }else
2624         skip_dc=1;
2625
2626     /* are all which we could set to zero are allready zero? */
2627     if(last_index<=skip_dc - 1) return;
2628
2629     for(i=0; i<=last_index; i++){
2630         const int j = s->intra_scantable.permutated[i];
2631         const int level = ABS(block[j]);
2632         if(level==1){
2633             if(skip_dc && i==0) continue;
2634             score+= tab[run];
2635             run=0;
2636         }else if(level>1){
2637             return;
2638         }else{
2639             run++;
2640         }
2641     }
2642     if(score >= threshold) return;
2643     for(i=skip_dc; i<=last_index; i++){
2644         const int j = s->intra_scantable.permutated[i];
2645         block[j]=0;
2646     }
2647     if(block[0]) s->block_last_index[n]= 0;
2648     else         s->block_last_index[n]= -1;
2649 }
2650
2651 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2652 {
2653     int i;
2654     const int maxlevel= s->max_qcoeff;
2655     const int minlevel= s->min_qcoeff;
2656
2657     if(s->mb_intra){
2658         i=1; //skip clipping of intra dc
2659     }else
2660         i=0;
2661
2662     for(;i<=last_index; i++){
2663         const int j= s->intra_scantable.permutated[i];
2664         int level = block[j];
2665
2666         if     (level>maxlevel) level=maxlevel;
2667         else if(level<minlevel) level=minlevel;
2668
2669         block[j]= level;
2670     }
2671 }
2672
2673 #if 0
2674 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2675     int score=0;
2676     int x,y;
2677
2678     for(y=0; y<7; y++){
2679         for(x=0; x<16; x+=4){
2680             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
2681                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2682         }
2683         s+= stride;
2684     }
2685
2686     return score;
2687 }
2688
2689 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2690     int score=0;
2691     int x,y;
2692
2693     for(y=0; y<7; y++){
2694         for(x=0; x<16; x++){
2695             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2696         }
2697         s1+= stride;
2698         s2+= stride;
2699     }
2700
2701     return score;
2702 }
2703 #else
2704 #define SQ(a) ((a)*(a))
2705
2706 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2707     int score=0;
2708     int x,y;
2709
2710     for(y=0; y<7; y++){
2711         for(x=0; x<16; x+=4){
2712             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
2713                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2714         }
2715         s+= stride;
2716     }
2717
2718     return score;
2719 }
2720
2721 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2722     int score=0;
2723     int x,y;
2724
2725     for(y=0; y<7; y++){
2726         for(x=0; x<16; x++){
2727             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2728         }
2729         s1+= stride;
2730         s2+= stride;
2731     }
2732
2733     return score;
2734 }
2735
2736 #endif
2737
2738 #endif //CONFIG_ENCODERS
2739
2740 /**
2741  *
2742  * @param h is the normal height, this will be reduced automatically if needed for the last row
2743  */
2744 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2745     if (s->avctx->draw_horiz_band) {
2746         AVFrame *src;
2747         int offset[4];
2748
2749         if(s->picture_structure != PICT_FRAME){
2750             h <<= 1;
2751             y <<= 1;
2752             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2753         }
2754
2755         h= FFMIN(h, s->height - y);
2756
2757         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2758             src= (AVFrame*)s->current_picture_ptr;
2759         else if(s->last_picture_ptr)
2760             src= (AVFrame*)s->last_picture_ptr;
2761         else
2762             return;
2763
2764         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2765             offset[0]=
2766             offset[1]=
2767             offset[2]=
2768             offset[3]= 0;
2769         }else{
2770             offset[0]= y * s->linesize;;
2771             offset[1]=
2772             offset[2]= (y>>1) * s->uvlinesize;;
2773             offset[3]= 0;
2774         }
2775
2776         emms_c();
2777
2778         s->avctx->draw_horiz_band(s->avctx, src, offset,
2779                                   y, s->picture_structure, h);
2780     }
2781 }
2782
2783 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2784     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2785     const int uvlinesize= s->current_picture.linesize[1];
2786
2787     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2788     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
2789     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
2790     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
2791     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2792     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2793
2794     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
2795         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
2796         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
2797         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
2798     }else{
2799         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
2800         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2801         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2802     }
2803 }
2804
2805 #ifdef CONFIG_ENCODERS
2806
2807 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2808 {
2809     const int mb_x= s->mb_x;
2810     const int mb_y= s->mb_y;
2811     int i;
2812     int skip_dct[6];
2813     int dct_offset   = s->linesize*8; //default for progressive frames
2814
2815     for(i=0; i<6; i++) skip_dct[i]=0;
2816
2817     if(s->adaptive_quant){
2818         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
2819
2820         if(s->out_format==FMT_H263){
2821             if     (s->dquant> 2) s->dquant= 2;
2822             else if(s->dquant<-2) s->dquant=-2;
2823         }
2824
2825         if(s->codec_id==CODEC_ID_MPEG4){
2826             if(!s->mb_intra){
2827                 if(s->mv_dir&MV_DIRECT)
2828                     s->dquant=0;
2829
2830                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2831             }
2832         }
2833         s->qscale+= s->dquant;
2834         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2835         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2836     }
2837
2838     if (s->mb_intra) {
2839         uint8_t *ptr;
2840         int wrap_y;
2841         int emu=0;
2842
2843         wrap_y = s->linesize;
2844         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2845
2846         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2847             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2848             ptr= s->edge_emu_buffer;
2849             emu=1;
2850         }
2851
2852         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2853             int progressive_score, interlaced_score;
2854
2855             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2856             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2857
2858             if(progressive_score > interlaced_score + 100){
2859                 s->interlaced_dct=1;
2860
2861                 dct_offset= wrap_y;
2862                 wrap_y<<=1;
2863             }else
2864                 s->interlaced_dct=0;
2865         }
2866
2867         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2868         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2869         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2870         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2871
2872         if(s->flags&CODEC_FLAG_GRAY){
2873             skip_dct[4]= 1;
2874             skip_dct[5]= 1;
2875         }else{
2876             int wrap_c = s->uvlinesize;
2877             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2878             if(emu){
2879                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2880                 ptr= s->edge_emu_buffer;
2881             }
2882             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2883
2884             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2885             if(emu){
2886                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2887                 ptr= s->edge_emu_buffer;
2888             }
2889             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2890         }
2891     }else{
2892         op_pixels_func (*op_pix)[4];
2893         qpel_mc_func (*op_qpix)[16];
2894         uint8_t *dest_y, *dest_cb, *dest_cr;
2895         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2896         int wrap_y, wrap_c;
2897         int emu=0;
2898
2899         dest_y  = s->dest[0];
2900         dest_cb = s->dest[1];
2901         dest_cr = s->dest[2];
2902         wrap_y = s->linesize;
2903         wrap_c = s->uvlinesize;
2904         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2905         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2906         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2907
2908         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2909             op_pix = s->dsp.put_pixels_tab;
2910             op_qpix= s->dsp.put_qpel_pixels_tab;
2911         }else{
2912             op_pix = s->dsp.put_no_rnd_pixels_tab;
2913             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2914         }
2915
2916         if (s->mv_dir & MV_DIR_FORWARD) {
2917             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2918             op_pix = s->dsp.avg_pixels_tab;
2919             op_qpix= s->dsp.avg_qpel_pixels_tab;
2920         }
2921         if (s->mv_dir & MV_DIR_BACKWARD) {
2922             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2923         }
2924
2925         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2926             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2927             ptr_y= s->edge_emu_buffer;
2928             emu=1;
2929         }
2930
2931         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2932             int progressive_score, interlaced_score;
2933
2934             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2935                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2936             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2937                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2938
2939             if(progressive_score > interlaced_score + 600){
2940                 s->interlaced_dct=1;
2941
2942                 dct_offset= wrap_y;
2943                 wrap_y<<=1;
2944             }else
2945                 s->interlaced_dct=0;
2946         }
2947
2948         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2949         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2950         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2951         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2952
2953         if(s->flags&CODEC_FLAG_GRAY){
2954             skip_dct[4]= 1;
2955             skip_dct[5]= 1;
2956         }else{
2957             if(emu){
2958                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2959                 ptr_cb= s->edge_emu_buffer;
2960             }
2961             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2962             if(emu){
2963                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2964                 ptr_cr= s->edge_emu_buffer;
2965             }
2966             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2967         }
2968         /* pre quantization */
2969         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2970             //FIXME optimize
2971             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2972             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2973             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2974             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2975             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2976             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2977 #if 0
2978 {
2979  static int stat[7];
2980  int num=0;
2981  for(i=0; i<6; i++)
2982   if(skip_dct[i]) num++;
2983  stat[num]++;
2984
2985  if(s->mb_x==0 && s->mb_y==0){
2986   for(i=0; i<7; i++){
2987    printf("%6d %1d\n", stat[i], i);
2988   }
2989  }
2990 }
2991 #endif
2992         }
2993
2994     }
2995
2996 #if 0
2997             {
2998                 float adap_parm;
2999
3000                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
3001                             ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
3002
3003                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
3004                         (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P',
3005                         s->qscale, adap_parm, s->qscale*adap_parm,
3006                         s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
3007             }
3008 #endif
3009     /* DCT & quantize */
3010     if(s->out_format==FMT_MJPEG){
3011         for(i=0;i<6;i++) {
3012             int overflow;
3013             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
3014             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3015         }
3016     }else{
3017         for(i=0;i<6;i++) {
3018             if(!skip_dct[i]){
3019                 int overflow;
3020                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3021             // FIXME we could decide to change to quantizer instead of clipping
3022             // JS: I don't think that would be a good idea it could lower quality instead
3023             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3024                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3025             }else
3026                 s->block_last_index[i]= -1;
3027         }
3028         if(s->flags & CODEC_FLAG_CBP_RD){
3029             for(i=0;i<6;i++) {
3030                 if(s->block_last_index[i] == -1)
3031                     s->coded_score[i]= INT_MAX/256;
3032             }
3033         }
3034
3035         if(s->luma_elim_threshold && !s->mb_intra)
3036             for(i=0; i<4; i++)
3037                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3038         if(s->chroma_elim_threshold && !s->mb_intra)
3039             for(i=4; i<6; i++)
3040                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3041     }
3042
3043     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3044         s->block_last_index[4]=
3045         s->block_last_index[5]= 0;
3046         s->block[4][0]=
3047         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3048     }
3049
3050     /* huffman encode */
3051     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3052     case CODEC_ID_MPEG1VIDEO:
3053     case CODEC_ID_MPEG2VIDEO:
3054         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3055 #ifdef CONFIG_RISKY
3056     case CODEC_ID_MPEG4:
3057         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3058     case CODEC_ID_MSMPEG4V2:
3059     case CODEC_ID_MSMPEG4V3:
3060     case CODEC_ID_WMV1:
3061         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3062     case CODEC_ID_WMV2:
3063          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3064     case CODEC_ID_H263:
3065     case CODEC_ID_H263P:
3066     case CODEC_ID_FLV1:
3067     case CODEC_ID_RV10:
3068         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3069 #endif
3070     case CODEC_ID_MJPEG:
3071         mjpeg_encode_mb(s, s->block); break;
3072     default:
3073         assert(0);
3074     }
3075 }
3076
3077 #endif //CONFIG_ENCODERS
3078
3079 /**
3080  * combines the (truncated) bitstream to a complete frame
3081  * @returns -1 if no complete frame could be created
3082  */
3083 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3084     ParseContext *pc= &s->parse_context;
3085
3086 #if 0
3087     if(pc->overread){
3088         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3089         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3090     }
3091 #endif
3092
3093     /* copy overreaded byes from last frame into buffer */
3094     for(; pc->overread>0; pc->overread--){
3095         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3096     }
3097
3098     pc->last_index= pc->index;
3099
3100     /* copy into buffer end return */
3101     if(next == END_NOT_FOUND){
3102         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3103
3104         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3105         pc->index += *buf_size;
3106         return -1;
3107     }
3108
3109     *buf_size=
3110     pc->overread_index= pc->index + next;
3111
3112     /* append to buffer */
3113     if(pc->index){
3114         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3115
3116         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3117         pc->index = 0;
3118         *buf= pc->buffer;
3119     }
3120
3121     /* store overread bytes */
3122     for(;next < 0; next++){
3123         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3124         pc->overread++;
3125     }
3126
3127 #if 0
3128     if(pc->overread){
3129         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3130         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3131     }
3132 #endif
3133
3134     return 0;
3135 }
3136
3137 void ff_mpeg_flush(AVCodecContext *avctx){
3138     int i;
3139     MpegEncContext *s = avctx->priv_data;
3140
3141     for(i=0; i<MAX_PICTURE_COUNT; i++){
3142        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3143                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3144         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3145     }
3146     s->last_picture_ptr = s->next_picture_ptr = NULL;
3147
3148     s->parse_context.state= -1;
3149     s->parse_context.frame_start_found= 0;
3150     s->parse_context.overread= 0;
3151     s->parse_context.overread_index= 0;
3152     s->parse_context.index= 0;
3153     s->parse_context.last_index= 0;
3154 }
3155
3156 #ifdef CONFIG_ENCODERS
3157 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3158 {
3159     int bytes= length>>4;
3160     int bits= length&15;
3161     int i;
3162
3163     if(length==0) return;
3164
3165     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3166     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3167 }
3168
3169 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3170     int i;
3171
3172     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3173
3174     /* mpeg1 */
3175     d->mb_skip_run= s->mb_skip_run;
3176     for(i=0; i<3; i++)
3177         d->last_dc[i]= s->last_dc[i];
3178
3179     /* statistics */
3180     d->mv_bits= s->mv_bits;
3181     d->i_tex_bits= s->i_tex_bits;
3182     d->p_tex_bits= s->p_tex_bits;
3183     d->i_count= s->i_count;
3184     d->f_count= s->f_count;
3185     d->b_count= s->b_count;
3186     d->skip_count= s->skip_count;
3187     d->misc_bits= s->misc_bits;
3188     d->last_bits= 0;
3189
3190     d->mb_skiped= 0;
3191     d->qscale= s->qscale;
3192 }
3193
3194 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3195     int i;
3196
3197     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
3198     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3199
3200     /* mpeg1 */
3201     d->mb_skip_run= s->mb_skip_run;
3202     for(i=0; i<3; i++)
3203         d->last_dc[i]= s->last_dc[i];
3204
3205     /* statistics */
3206     d->mv_bits= s->mv_bits;
3207     d->i_tex_bits= s->i_tex_bits;
3208     d->p_tex_bits= s->p_tex_bits;
3209     d->i_count= s->i_count;
3210     d->f_count= s->f_count;
3211     d->b_count= s->b_count;
3212     d->skip_count= s->skip_count;
3213     d->misc_bits= s->misc_bits;
3214
3215     d->mb_intra= s->mb_intra;
3216     d->mb_skiped= s->mb_skiped;
3217     d->mv_type= s->mv_type;
3218     d->mv_dir= s->mv_dir;
3219     d->pb= s->pb;
3220     if(s->data_partitioning){
3221         d->pb2= s->pb2;
3222         d->tex_pb= s->tex_pb;
3223     }
3224     d->block= s->block;
3225     for(i=0; i<6; i++)
3226         d->block_last_index[i]= s->block_last_index[i];
3227     d->interlaced_dct= s->interlaced_dct;
3228     d->qscale= s->qscale;
3229 }
3230
3231 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
3232                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3233                            int *dmin, int *next_block, int motion_x, int motion_y)
3234 {
3235     int score;
3236     uint8_t *dest_backup[3];
3237
3238     copy_context_before_encode(s, backup, type);
3239
3240     s->block= s->blocks[*next_block];
3241     s->pb= pb[*next_block];
3242     if(s->data_partitioning){
3243         s->pb2   = pb2   [*next_block];
3244         s->tex_pb= tex_pb[*next_block];
3245     }
3246
3247     if(*next_block){
3248         memcpy(dest_backup, s->dest, sizeof(s->dest));
3249         s->dest[0] = s->me.scratchpad;
3250         s->dest[1] = s->me.scratchpad + 16;
3251         s->dest[2] = s->me.scratchpad + 16 + 8;
3252         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3253         assert(s->linesize >= 64); //FIXME
3254     }
3255
3256     encode_mb(s, motion_x, motion_y);
3257
3258     score= get_bit_count(&s->pb);
3259     if(s->data_partitioning){
3260         score+= get_bit_count(&s->pb2);
3261         score+= get_bit_count(&s->tex_pb);
3262     }
3263
3264     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3265         MPV_decode_mb(s, s->block);
3266
3267         score *= s->qscale * s->qscale * 109;
3268         score += sse_mb(s) << 7;
3269     }
3270
3271     if(*next_block){
3272         memcpy(s->dest, dest_backup, sizeof(s->dest));
3273     }
3274
3275     if(score<*dmin){
3276         *dmin= score;
3277         *next_block^=1;
3278
3279         copy_context_after_encode(best, s, type);
3280     }
3281 }
3282
3283 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3284     uint32_t *sq = squareTbl + 256;
3285     int acc=0;
3286     int x,y;
3287
3288     if(w==16 && h==16)
3289         return s->dsp.sse[0](NULL, src1, src2, stride);
3290     else if(w==8 && h==8)
3291         return s->dsp.sse[1](NULL, src1, src2, stride);
3292
3293     for(y=0; y<h; y++){
3294         for(x=0; x<w; x++){
3295             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3296         }
3297     }
3298
3299     assert(acc>=0);
3300
3301     return acc;
3302 }
3303
3304 static int sse_mb(MpegEncContext *s){
3305     int w= 16;
3306     int h= 16;
3307
3308     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3309     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3310
3311     if(w==16 && h==16)
3312         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3313                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3314                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3315     else
3316         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3317                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3318                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3319 }
3320
3321 static void encode_picture(MpegEncContext *s, int picture_number)
3322 {
3323     int mb_x, mb_y, pdif = 0;
3324     int i;
3325     int bits;
3326     MpegEncContext best_s, backup_s;
3327     uint8_t bit_buf[2][3000];
3328     uint8_t bit_buf2[2][3000];
3329     uint8_t bit_buf_tex[2][3000];
3330     PutBitContext pb[2], pb2[2], tex_pb[2];
3331
3332     for(i=0; i<2; i++){
3333         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
3334         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
3335         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
3336     }
3337
3338     s->picture_number = picture_number;
3339
3340     /* Reset the average MB variance */
3341     s->current_picture.mb_var_sum = 0;
3342     s->current_picture.mc_mb_var_sum = 0;
3343
3344 #ifdef CONFIG_RISKY
3345     /* we need to initialize some time vars before we can encode b-frames */
3346     // RAL: Condition added for MPEG1VIDEO
3347     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3348         ff_set_mpeg4_time(s, s->picture_number);
3349 #endif
3350
3351     s->scene_change_score=0;
3352
3353     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
3354
3355     if(s->pict_type==I_TYPE){
3356         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3357         else                        s->no_rounding=0;
3358     }else if(s->pict_type!=B_TYPE){
3359         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3360             s->no_rounding ^= 1;
3361     }
3362
3363     /* Estimate motion for every MB */
3364     s->mb_intra=0; //for the rate distoration & bit compare functions
3365     if(s->pict_type != I_TYPE){
3366         if(s->pict_type != B_TYPE){
3367             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3368                 s->me.pre_pass=1;
3369                 s->me.dia_size= s->avctx->pre_dia_size;
3370
3371                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3372                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3373                         s->mb_x = mb_x;
3374                         s->mb_y = mb_y;
3375                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3376                     }
3377                 }
3378                 s->me.pre_pass=0;
3379             }
3380         }
3381
3382         s->me.dia_size= s->avctx->dia_size;
3383         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3384             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3385             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3386             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3387             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3388             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3389                 s->mb_x = mb_x;
3390                 s->mb_y = mb_y;
3391                 s->block_index[0]+=2;
3392                 s->block_index[1]+=2;
3393                 s->block_index[2]+=2;
3394                 s->block_index[3]+=2;
3395
3396                 /* compute motion vector & mb_type and store in context */
3397                 if(s->pict_type==B_TYPE)
3398                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3399                 else
3400                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3401             }
3402         }
3403     }else /* if(s->pict_type == I_TYPE) */{
3404         /* I-Frame */
3405         //FIXME do we need to zero them?
3406         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3407         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3408         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3409
3410         if(!s->fixed_qscale){
3411             /* finding spatial complexity for I-frame rate control */
3412             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3413                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3414                     int xx = mb_x * 16;
3415                     int yy = mb_y * 16;
3416                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3417                     int varc;
3418                     int sum = s->dsp.pix_sum(pix, s->linesize);
3419
3420                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3421
3422                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3423                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3424                     s->current_picture.mb_var_sum    += varc;
3425                 }
3426             }
3427         }
3428     }
3429     emms_c();
3430
3431     if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
3432         s->pict_type= I_TYPE;
3433         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3434 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3435     }
3436
3437     if(!s->umvplus){
3438         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3439             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3440
3441             ff_fix_long_p_mvs(s);
3442         }
3443
3444         if(s->pict_type==B_TYPE){
3445             int a, b;
3446
3447             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3448             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3449             s->f_code = FFMAX(a, b);
3450
3451             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3452             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3453             s->b_code = FFMAX(a, b);
3454
3455             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3456             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3457             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3458             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3459         }
3460     }
3461
3462     if (s->fixed_qscale)
3463         s->frame_qscale = s->current_picture.quality;
3464     else
3465         s->frame_qscale = ff_rate_estimate_qscale(s);
3466
3467     if(s->adaptive_quant){
3468 #ifdef CONFIG_RISKY
3469         switch(s->codec_id){
3470         case CODEC_ID_MPEG4:
3471             ff_clean_mpeg4_qscales(s);
3472             break;
3473         case CODEC_ID_H263:
3474         case CODEC_ID_H263P:
3475         case CODEC_ID_FLV1:
3476             ff_clean_h263_qscales(s);
3477             break;
3478         }
3479 #endif
3480
3481         s->qscale= s->current_picture.qscale_table[0];
3482     }else
3483         s->qscale= (int)(s->frame_qscale + 0.5);
3484
3485     if (s->out_format == FMT_MJPEG) {
3486         /* for mjpeg, we do include qscale in the matrix */
3487         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3488         for(i=1;i<64;i++){
3489             int j= s->dsp.idct_permutation[i];
3490
3491             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3492         }
3493         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3494                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3495     }
3496
3497     //FIXME var duplication
3498     s->current_picture.key_frame= s->pict_type == I_TYPE;
3499     s->current_picture.pict_type= s->pict_type;
3500
3501     if(s->current_picture.key_frame)
3502         s->picture_in_gop_number=0;
3503
3504     s->last_bits= get_bit_count(&s->pb);
3505     switch(s->out_format) {
3506     case FMT_MJPEG:
3507         mjpeg_picture_header(s);
3508         break;
3509 #ifdef CONFIG_RISKY
3510     case FMT_H263:
3511         if (s->codec_id == CODEC_ID_WMV2)
3512             ff_wmv2_encode_picture_header(s, picture_number);
3513         else if (s->h263_msmpeg4)
3514             msmpeg4_encode_picture_header(s, picture_number);
3515         else if (s->h263_pred)
3516             mpeg4_encode_picture_header(s, picture_number);
3517         else if (s->h263_rv10)
3518             rv10_encode_picture_header(s, picture_number);
3519         else if (s->codec_id == CODEC_ID_FLV1)
3520             ff_flv_encode_picture_header(s, picture_number);
3521         else
3522             h263_encode_picture_header(s, picture_number);
3523         break;
3524 #endif
3525     case FMT_MPEG1:
3526         mpeg1_encode_picture_header(s, picture_number);
3527         break;
3528     case FMT_H264:
3529         break;
3530     }
3531     bits= get_bit_count(&s->pb);
3532     s->header_bits= bits - s->last_bits;
3533     s->last_bits= bits;
3534     s->mv_bits=0;
3535     s->misc_bits=0;
3536     s->i_tex_bits=0;
3537     s->p_tex_bits=0;
3538     s->i_count=0;
3539     s->f_count=0;
3540     s->b_count=0;
3541     s->skip_count=0;
3542
3543     for(i=0; i<3; i++){
3544         /* init last dc values */
3545         /* note: quant matrix value (8) is implied here */
3546         s->last_dc[i] = 128;
3547
3548         s->current_picture_ptr->error[i] = 0;
3549     }
3550     s->mb_skip_run = 0;
3551     s->last_mv[0][0][0] = 0;
3552     s->last_mv[0][0][1] = 0;
3553     s->last_mv[1][0][0] = 0;
3554     s->last_mv[1][0][1] = 0;
3555
3556     s->last_mv_dir = 0;
3557
3558 #ifdef CONFIG_RISKY
3559     switch(s->codec_id){
3560     case CODEC_ID_H263:
3561     case CODEC_ID_H263P:
3562     case CODEC_ID_FLV1:
3563         s->gob_index = ff_h263_get_gob_height(s);
3564         break;
3565     case CODEC_ID_MPEG4:
3566         if(s->partitioned_frame)
3567             ff_mpeg4_init_partitions(s);
3568         break;
3569     }
3570 #endif
3571
3572     s->resync_mb_x=0;
3573     s->resync_mb_y=0;
3574     s->first_slice_line = 1;
3575     s->ptr_lastgob = s->pb.buf;
3576     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3577         s->mb_x=0;
3578         s->mb_y= mb_y;
3579
3580         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3581         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3582         ff_init_block_index(s);
3583
3584         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3585             const int xy= mb_y*s->mb_stride + mb_x;
3586             int mb_type= s->mb_type[xy];
3587 //            int d;
3588             int dmin= INT_MAX;
3589
3590             s->mb_x = mb_x;
3591             ff_update_block_index(s);
3592
3593             /* write gob / video packet header  */
3594 #ifdef CONFIG_RISKY
3595             if(s->rtp_mode && mb_y + mb_x>0){
3596                 int current_packet_size, is_gob_start;
3597
3598                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3599                 is_gob_start=0;
3600
3601                 if(s->codec_id==CODEC_ID_MPEG4){
3602                     if(current_packet_size >= s->rtp_payload_size){
3603
3604                         if(s->partitioned_frame){
3605                             ff_mpeg4_merge_partitions(s);
3606                             ff_mpeg4_init_partitions(s);
3607                         }
3608                         ff_mpeg4_encode_video_packet_header(s);
3609
3610                         if(s->flags&CODEC_FLAG_PASS1){
3611                             int bits= get_bit_count(&s->pb);
3612                             s->misc_bits+= bits - s->last_bits;
3613                             s->last_bits= bits;
3614                         }
3615                         ff_mpeg4_clean_buffers(s);
3616                         is_gob_start=1;
3617                     }
3618                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3619                     if(   current_packet_size >= s->rtp_payload_size
3620                        && s->mb_skip_run==0){
3621                         ff_mpeg1_encode_slice_header(s);
3622                         ff_mpeg1_clean_buffers(s);
3623                         is_gob_start=1;
3624                     }
3625                 }else if(s->codec_id==CODEC_ID_MPEG2VIDEO){
3626                     if(   (   current_packet_size >= s->rtp_payload_size || mb_x==0)
3627                        && s->mb_skip_run==0){
3628                         ff_mpeg1_encode_slice_header(s);
3629                         ff_mpeg1_clean_buffers(s);
3630                         is_gob_start=1;
3631                     }
3632                 }else{
3633                     if(current_packet_size >= s->rtp_payload_size
3634                        && s->mb_x==0 && s->mb_y%s->gob_index==0){
3635
3636                         h263_encode_gob_header(s, mb_y);
3637                         is_gob_start=1;
3638                     }
3639                 }
3640
3641                 if(is_gob_start){
3642                     s->ptr_lastgob = pbBufPtr(&s->pb);
3643                     s->first_slice_line=1;
3644                     s->resync_mb_x=mb_x;
3645                     s->resync_mb_y=mb_y;
3646                 }
3647             }
3648 #endif
3649
3650             if(  (s->resync_mb_x   == s->mb_x)
3651                && s->resync_mb_y+1 == s->mb_y){
3652                 s->first_slice_line=0;
3653             }
3654
3655             s->mb_skiped=0;
3656
3657             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3658                 int next_block=0;
3659                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3660
3661                 copy_context_before_encode(&backup_s, s, -1);
3662                 backup_s.pb= s->pb;
3663                 best_s.data_partitioning= s->data_partitioning;
3664                 best_s.partitioned_frame= s->partitioned_frame;
3665                 if(s->data_partitioning){
3666                     backup_s.pb2= s->pb2;
3667                     backup_s.tex_pb= s->tex_pb;
3668                 }
3669
3670                 if(mb_type&MB_TYPE_INTER){
3671                     s->mv_dir = MV_DIR_FORWARD;
3672                     s->mv_type = MV_TYPE_16X16;
3673                     s->mb_intra= 0;
3674                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3675                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3676                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
3677                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3678                 }
3679                 if(mb_type&MB_TYPE_INTER4V){
3680                     s->mv_dir = MV_DIR_FORWARD;
3681                     s->mv_type = MV_TYPE_8X8;
3682                     s->mb_intra= 0;
3683                     for(i=0; i<4; i++){
3684                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3685                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3686                     }
3687                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
3688                                  &dmin, &next_block, 0, 0);
3689                 }
3690                 if(mb_type&MB_TYPE_FORWARD){
3691                     s->mv_dir = MV_DIR_FORWARD;
3692                     s->mv_type = MV_TYPE_16X16;
3693                     s->mb_intra= 0;
3694                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3695                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3696                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
3697                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3698                 }
3699                 if(mb_type&MB_TYPE_BACKWARD){
3700                     s->mv_dir = MV_DIR_BACKWARD;
3701                     s->mv_type = MV_TYPE_16X16;
3702                     s->mb_intra= 0;
3703                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3704                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3705                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3706                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3707                 }
3708                 if(mb_type&MB_TYPE_BIDIR){
3709                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3710                     s->mv_type = MV_TYPE_16X16;
3711                     s->mb_intra= 0;
3712                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3713                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3714                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3715                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3716                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
3717                                  &dmin, &next_block, 0, 0);
3718                 }
3719                 if(mb_type&MB_TYPE_DIRECT){
3720                     int mx= s->b_direct_mv_table[xy][0];
3721                     int my= s->b_direct_mv_table[xy][1];
3722
3723                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3724                     s->mb_intra= 0;
3725 #ifdef CONFIG_RISKY
3726                     ff_mpeg4_set_direct_mv(s, mx, my);
3727 #endif
3728                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
3729                                  &dmin, &next_block, mx, my);
3730                 }
3731                 if(mb_type&MB_TYPE_INTRA){
3732                     s->mv_dir = 0;
3733                     s->mv_type = MV_TYPE_16X16;
3734                     s->mb_intra= 1;
3735                     s->mv[0][0][0] = 0;
3736                     s->mv[0][0][1] = 0;
3737                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
3738                                  &dmin, &next_block, 0, 0);
3739                     if(s->h263_pred || s->h263_aic){
3740                         if(best_s.mb_intra)
3741                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3742                         else
3743                             ff_clean_intra_table_entries(s); //old mode?
3744                     }
3745                 }
3746                 copy_context_after_encode(s, &best_s, -1);
3747
3748                 pb_bits_count= get_bit_count(&s->pb);
3749                 flush_put_bits(&s->pb);
3750                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3751                 s->pb= backup_s.pb;
3752
3753                 if(s->data_partitioning){
3754                     pb2_bits_count= get_bit_count(&s->pb2);
3755                     flush_put_bits(&s->pb2);
3756                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3757                     s->pb2= backup_s.pb2;
3758
3759                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3760                     flush_put_bits(&s->tex_pb);
3761                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3762                     s->tex_pb= backup_s.tex_pb;
3763                 }
3764                 s->last_bits= get_bit_count(&s->pb);
3765
3766 #ifdef CONFIG_RISKY
3767                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3768                     ff_h263_update_motion_val(s);
3769 #endif
3770
3771                 if(next_block==0){
3772                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
3773                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
3774                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
3775                 }
3776
3777                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3778                     MPV_decode_mb(s, s->block);
3779             } else {
3780                 int motion_x, motion_y;
3781                 int intra_score;
3782                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3783
3784               if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
3785                 /* get luma score */
3786                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3787                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3788                 }else{
3789                     uint8_t *dest_y;
3790
3791                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3792                     mean*= 0x01010101;
3793
3794                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3795
3796                     for(i=0; i<16; i++){
3797                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3798                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3799                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3800                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3801                     }
3802
3803                     s->mb_intra=1;
3804                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3805
3806 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8,
3807                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3808                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3809                 }
3810
3811                 /* get chroma score */
3812                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3813                     int i;
3814
3815                     s->mb_intra=1;
3816                     for(i=1; i<3; i++){
3817                         uint8_t *dest_c;
3818                         int mean;
3819
3820                         if(s->out_format == FMT_H263){
3821                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3822                         }else{
3823                             mean= (s->last_dc[i] + 4)>>3;
3824                         }
3825                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3826
3827                         mean*= 0x01010101;
3828                         for(i=0; i<8; i++){
3829                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3830                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3831                         }
3832
3833                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3834                     }
3835                 }
3836
3837                 /* bias */
3838                 switch(s->avctx->mb_cmp&0xFF){
3839                 default:
3840                 case FF_CMP_SAD:
3841                     intra_score+= 32*s->qscale;
3842                     break;
3843                 case FF_CMP_SSE:
3844                     intra_score+= 24*s->qscale*s->qscale;
3845                     break;
3846                 case FF_CMP_SATD:
3847                     intra_score+= 96*s->qscale;
3848                     break;
3849                 case FF_CMP_DCT:
3850                     intra_score+= 48*s->qscale;
3851                     break;
3852                 case FF_CMP_BIT:
3853                     intra_score+= 16;
3854                     break;
3855                 case FF_CMP_PSNR:
3856                 case FF_CMP_RD:
3857                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3858                     break;
3859                 }
3860
3861                 if(intra_score < inter_score)
3862                     mb_type= MB_TYPE_INTRA;
3863               }
3864
3865                 s->mv_type=MV_TYPE_16X16;
3866                 // only one MB-Type possible
3867
3868                 switch(mb_type){
3869                 case MB_TYPE_INTRA:
3870                     s->mv_dir = 0;
3871                     s->mb_intra= 1;
3872                     motion_x= s->mv[0][0][0] = 0;
3873                     motion_y= s->mv[0][0][1] = 0;
3874                     break;
3875                 case MB_TYPE_INTER:
3876                     s->mv_dir = MV_DIR_FORWARD;
3877                     s->mb_intra= 0;
3878                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3879                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3880                     break;
3881                 case MB_TYPE_INTER4V:
3882                     s->mv_dir = MV_DIR_FORWARD;
3883                     s->mv_type = MV_TYPE_8X8;
3884                     s->mb_intra= 0;
3885                     for(i=0; i<4; i++){
3886                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3887                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3888                     }
3889                     motion_x= motion_y= 0;
3890                     break;
3891                 case MB_TYPE_DIRECT:
3892                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3893                     s->mb_intra= 0;
3894                     motion_x=s->b_direct_mv_table[xy][0];
3895                     motion_y=s->b_direct_mv_table[xy][1];
3896 #ifdef CONFIG_RISKY
3897                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3898 #endif
3899                     break;
3900                 case MB_TYPE_BIDIR:
3901                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3902                     s->mb_intra= 0;
3903                     motion_x=0;
3904                     motion_y=0;
3905                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3906                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3907                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3908                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3909                     break;
3910                 case MB_TYPE_BACKWARD:
3911                     s->mv_dir = MV_DIR_BACKWARD;
3912                     s->mb_intra= 0;
3913                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3914                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3915                     break;
3916                 case MB_TYPE_FORWARD:
3917                     s->mv_dir = MV_DIR_FORWARD;
3918                     s->mb_intra= 0;
3919                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3920                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3921 //                    printf(" %d %d ", motion_x, motion_y);
3922                     break;
3923                 default:
3924                     motion_x=motion_y=0; //gcc warning fix
3925                     printf("illegal MB type\n");
3926                 }
3927
3928                 encode_mb(s, motion_x, motion_y);
3929
3930                 // RAL: Update last macrobloc type
3931                 s->last_mv_dir = s->mv_dir;
3932
3933 #ifdef CONFIG_RISKY
3934                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3935                     ff_h263_update_motion_val(s);
3936 #endif
3937
3938                 MPV_decode_mb(s, s->block);
3939             }
3940
3941             /* clean the MV table in IPS frames for direct mode in B frames */
3942             if(s->mb_intra /* && I,P,S_TYPE */){
3943                 s->p_mv_table[xy][0]=0;
3944                 s->p_mv_table[xy][1]=0;
3945             }
3946
3947             if(s->flags&CODEC_FLAG_PSNR){
3948                 int w= 16;
3949                 int h= 16;
3950
3951                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3952                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3953
3954                 s->current_picture_ptr->error[0] += sse(
3955                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3956                     s->dest[0], w, h, s->linesize);
3957                 s->current_picture_ptr->error[1] += sse(
3958                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3959                     s->dest[1], w>>1, h>>1, s->uvlinesize);
3960                 s->current_picture_ptr->error[2] += sse(
3961                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3962                     s->dest[2], w>>1, h>>1, s->uvlinesize);
3963             }
3964 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3965         }
3966     }
3967     emms_c();
3968
3969 #ifdef CONFIG_RISKY
3970     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3971         ff_mpeg4_merge_partitions(s);
3972
3973     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3974         msmpeg4_encode_ext_header(s);
3975
3976     if(s->codec_id==CODEC_ID_MPEG4)
3977         ff_mpeg4_stuffing(&s->pb);
3978 #endif
3979
3980     //if (s->gob_number)
3981     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3982
3983     /* Send the last GOB if RTP */
3984     if (s->rtp_mode) {
3985         flush_put_bits(&s->pb);
3986         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3987         /* Call the RTP callback to send the last GOB */
3988         if (s->rtp_callback)
3989             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3990         s->ptr_lastgob = pbBufPtr(&s->pb);
3991         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3992     }
3993 }
3994
3995 static int dct_quantize_trellis_c(MpegEncContext *s,
3996                         DCTELEM *block, int n,
3997                         int qscale, int *overflow){
3998     const int *qmat;
3999     const uint8_t *scantable= s->intra_scantable.scantable;
4000     int max=0;
4001     unsigned int threshold1, threshold2;
4002     int bias=0;
4003     int run_tab[65];
4004     int level_tab[65];
4005     int score_tab[65];
4006     int last_run=0;
4007     int last_level=0;
4008     int last_score= 0;
4009     int last_i= 0;
4010     int not_coded_score= 0;
4011     int coeff[3][64];
4012     int coeff_count[64];
4013     int lambda, qmul, qadd, start_i, last_non_zero, i, dc;
4014     const int esc_length= s->ac_esc_length;
4015     uint8_t * length;
4016     uint8_t * last_length;
4017     int score_limit=0;
4018     int left_limit= 0;
4019
4020     s->dsp.fdct (block);
4021
4022     qmul= qscale*16;
4023     qadd= ((qscale-1)|1)*8;
4024
4025     if (s->mb_intra) {
4026         int q;
4027         if (!s->h263_aic) {
4028             if (n < 4)
4029                 q = s->y_dc_scale;
4030             else
4031                 q = s->c_dc_scale;
4032             q = q << 3;
4033         } else{
4034             /* For AIC we skip quant/dequant of INTRADC */
4035             q = 1 << 3;
4036             qadd=0;
4037         }
4038
4039         /* note: block[0] is assumed to be positive */
4040         block[0] = (block[0] + (q >> 1)) / q;
4041         start_i = 1;
4042         last_non_zero = 0;
4043         qmat = s->q_intra_matrix[qscale];
4044         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4045             bias= 1<<(QMAT_SHIFT-1);
4046         length     = s->intra_ac_vlc_length;
4047         last_length= s->intra_ac_vlc_last_length;
4048     } else {
4049         start_i = 0;
4050         last_non_zero = -1;
4051         qmat = s->q_inter_matrix[qscale];
4052         length     = s->inter_ac_vlc_length;
4053         last_length= s->inter_ac_vlc_last_length;
4054     }
4055
4056     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4057     threshold2= (threshold1<<1);
4058
4059     for(i=start_i; i<64; i++) {
4060         const int j = scantable[i];
4061         const int k= i-start_i;
4062         int level = block[j];
4063         level = level * qmat[j];
4064
4065 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4066 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4067         if(((unsigned)(level+threshold1))>threshold2){
4068             if(level>0){
4069                 level= (bias + level)>>QMAT_SHIFT;
4070                 coeff[0][k]= level;
4071                 coeff[1][k]= level-1;
4072 //                coeff[2][k]= level-2;
4073             }else{
4074                 level= (bias - level)>>QMAT_SHIFT;
4075                 coeff[0][k]= -level;
4076                 coeff[1][k]= -level+1;
4077 //                coeff[2][k]= -level+2;
4078             }
4079             coeff_count[k]= FFMIN(level, 2);
4080             assert(coeff_count[k]);
4081             max |=level;
4082             last_non_zero = i;
4083         }else{
4084             coeff[0][k]= (level>>31)|1;
4085             coeff_count[k]= 1;
4086         }
4087     }
4088
4089     *overflow= s->max_qcoeff < max; //overflow might have happend
4090
4091     if(last_non_zero < start_i){
4092         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4093         return last_non_zero;
4094     }
4095
4096     lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
4097
4098     score_tab[0]= 0;
4099     for(i=0; i<=last_non_zero - start_i; i++){
4100         int level_index, run, j;
4101         const int dct_coeff= block[ scantable[i + start_i] ];
4102         const int zero_distoration= dct_coeff*dct_coeff;
4103         int best_score=256*256*256*120;
4104
4105         last_score += zero_distoration;
4106         not_coded_score += zero_distoration;
4107         for(level_index=0; level_index < coeff_count[i]; level_index++){
4108             int distoration;
4109             int level= coeff[level_index][i];
4110             int unquant_coeff;
4111
4112             assert(level);
4113
4114             if(s->out_format == FMT_H263){
4115                 if(level>0){
4116                     unquant_coeff= level*qmul + qadd;
4117                 }else{
4118                     unquant_coeff= level*qmul - qadd;
4119                 }
4120             }else{ //MPEG1
4121                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4122                 if(s->mb_intra){
4123                     if (level < 0) {
4124                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4125                         unquant_coeff = -((unquant_coeff - 1) | 1);
4126                     } else {
4127                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4128                         unquant_coeff =   (unquant_coeff - 1) | 1;
4129                     }
4130                 }else{
4131                     if (level < 0) {
4132                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4133                         unquant_coeff = -((unquant_coeff - 1) | 1);
4134                     } else {
4135                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4136                         unquant_coeff =   (unquant_coeff - 1) | 1;
4137                     }
4138                 }
4139                 unquant_coeff<<= 3;
4140             }
4141
4142             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4143             level+=64;
4144             if((level&(~127)) == 0){
4145                 for(run=0; run<=i - left_limit; run++){
4146                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4147                     score += score_tab[i-run];
4148
4149                     if(score < best_score){
4150                         best_score=
4151                         score_tab[i+1]= score;
4152                         run_tab[i+1]= run;
4153                         level_tab[i+1]= level-64;
4154                     }
4155                 }
4156
4157                 if(s->out_format == FMT_H263){
4158                     for(run=0; run<=i - left_limit; run++){
4159                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4160                         score += score_tab[i-run];
4161                         if(score < last_score){
4162                             last_score= score;
4163                             last_run= run;
4164                             last_level= level-64;
4165                             last_i= i+1;
4166                         }
4167                     }
4168                 }
4169             }else{
4170                 distoration += esc_length*lambda;
4171                 for(run=0; run<=i - left_limit; run++){
4172                     int score= distoration + score_tab[i-run];
4173
4174                     if(score < best_score){
4175                         best_score=
4176                         score_tab[i+1]= score;
4177                         run_tab[i+1]= run;
4178                         level_tab[i+1]= level-64;
4179                     }
4180                 }
4181
4182                 if(s->out_format == FMT_H263){
4183                     for(run=0; run<=i - left_limit; run++){
4184                         int score= distoration + score_tab[i-run];
4185                         if(score < last_score){
4186                             last_score= score;
4187                             last_run= run;
4188                             last_level= level-64;
4189                             last_i= i+1;
4190                         }
4191                     }
4192                 }
4193             }
4194         }
4195
4196         for(j=left_limit; j<=i; j++){
4197             score_tab[j] += zero_distoration;
4198         }
4199         score_limit+= zero_distoration;
4200         if(score_tab[i+1] < score_limit)
4201             score_limit= score_tab[i+1];
4202
4203         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4204         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4205     }
4206
4207         //FIXME add some cbp penalty
4208
4209     if(s->out_format != FMT_H263){
4210         last_score= 256*256*256*120;
4211         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4212             int score= score_tab[i];
4213             if(i) score += lambda*2; //FIXME exacter?
4214
4215             if(score < last_score){
4216                 last_score= score;
4217                 last_i= i;
4218                 last_level= level_tab[i];
4219                 last_run= run_tab[i];
4220             }
4221         }
4222     }
4223
4224     s->coded_score[n] = last_score - not_coded_score;
4225
4226     dc= block[0];
4227     last_non_zero= last_i - 1 + start_i;
4228     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4229
4230     if(last_non_zero < start_i)
4231         return last_non_zero;
4232
4233     if(last_non_zero == 0 && start_i == 0){
4234         int best_level= 0;
4235         int best_score= dc * dc;
4236
4237         for(i=0; i<coeff_count[0]; i++){
4238             int level= coeff[i][0];
4239             int unquant_coeff, score, distoration;
4240
4241             if(s->out_format == FMT_H263){
4242                 if(level>0){
4243                     unquant_coeff= (level*qmul + qadd)>>3;
4244                 }else{
4245                     unquant_coeff= (level*qmul - qadd)>>3;
4246                 }
4247             }else{ //MPEG1
4248                     if (level < 0) {
4249                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4250                         unquant_coeff = -((unquant_coeff - 1) | 1);
4251                     } else {
4252                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4253                         unquant_coeff =   (unquant_coeff - 1) | 1;
4254                     }
4255             }
4256             unquant_coeff = (unquant_coeff + 4) >> 3;
4257             unquant_coeff<<= 3 + 3;
4258
4259             distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
4260             level+=64;
4261             if((level&(~127)) == 0)
4262                 score= distoration + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4263             else
4264                 score= distoration + esc_length*lambda;
4265
4266             if(score < best_score){
4267                 best_score= score;
4268                 best_level= level - 64;
4269             }
4270         }
4271         block[0]= best_level;
4272         s->coded_score[n] = best_score - dc*dc;
4273         if(best_level == 0) return -1;
4274         else                return last_non_zero;
4275     }
4276
4277     i= last_i;
4278     assert(last_level);
4279 //FIXME use permutated scantable
4280     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4281     i -= last_run + 1;
4282
4283     for(;i>0 ; i -= run_tab[i] + 1){
4284         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4285
4286         block[j]= level_tab[i];
4287         assert(block[j]);
4288     }
4289
4290     return last_non_zero;
4291 }
4292
4293 static int dct_quantize_c(MpegEncContext *s,
4294                         DCTELEM *block, int n,
4295                         int qscale, int *overflow)
4296 {
4297     int i, j, level, last_non_zero, q;
4298     const int *qmat;
4299     const uint8_t *scantable= s->intra_scantable.scantable;
4300     int bias;
4301     int max=0;
4302     unsigned int threshold1, threshold2;
4303
4304     s->dsp.fdct (block);
4305
4306     if (s->mb_intra) {
4307         if (!s->h263_aic) {
4308             if (n < 4)
4309                 q = s->y_dc_scale;
4310             else
4311                 q = s->c_dc_scale;
4312             q = q << 3;
4313         } else
4314             /* For AIC we skip quant/dequant of INTRADC */
4315             q = 1 << 3;
4316
4317         /* note: block[0] is assumed to be positive */
4318         block[0] = (block[0] + (q >> 1)) / q;
4319         i = 1;
4320         last_non_zero = 0;
4321         qmat = s->q_intra_matrix[qscale];
4322         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4323     } else {
4324         i = 0;
4325         last_non_zero = -1;
4326         qmat = s->q_inter_matrix[qscale];
4327         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4328     }
4329     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4330     threshold2= (threshold1<<1);
4331
4332     for(;i<64;i++) {
4333         j = scantable[i];
4334         level = block[j];
4335         level = level * qmat[j];
4336
4337 //        if(   bias+level >= (1<<QMAT_SHIFT)
4338 //           || bias-level >= (1<<QMAT_SHIFT)){
4339         if(((unsigned)(level+threshold1))>threshold2){
4340             if(level>0){
4341                 level= (bias + level)>>QMAT_SHIFT;
4342                 block[j]= level;
4343             }else{
4344                 level= (bias - level)>>QMAT_SHIFT;
4345                 block[j]= -level;
4346             }
4347             max |=level;
4348             last_non_zero = i;
4349         }else{
4350             block[j]=0;
4351         }
4352     }
4353     *overflow= s->max_qcoeff < max; //overflow might have happend
4354
4355     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4356     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4357         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4358
4359     return last_non_zero;
4360 }
4361
4362 #endif //CONFIG_ENCODERS
4363
4364 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
4365                                    DCTELEM *block, int n, int qscale)
4366 {
4367     int i, level, nCoeffs;
4368     const uint16_t *quant_matrix;
4369
4370     nCoeffs= s->block_last_index[n];
4371
4372     if (s->mb_intra) {
4373         if (n < 4)
4374             block[0] = block[0] * s->y_dc_scale;
4375         else
4376             block[0] = block[0] * s->c_dc_scale;
4377         /* XXX: only mpeg1 */
4378         quant_matrix = s->intra_matrix;
4379         for(i=1;i<=nCoeffs;i++) {
4380             int j= s->intra_scantable.permutated[i];
4381             level = block[j];
4382             if (level) {
4383                 if (level < 0) {
4384                     level = -level;
4385                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4386                     level = (level - 1) | 1;
4387                     level = -level;
4388                 } else {
4389                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4390                     level = (level - 1) | 1;
4391                 }
4392 #ifdef PARANOID
4393                 if (level < -2048 || level > 2047)
4394                     fprintf(stderr, "unquant error %d %d\n", i, level);
4395 #endif
4396                 block[j] = level;
4397             }
4398         }
4399     } else {
4400         i = 0;
4401         quant_matrix = s->inter_matrix;
4402         for(;i<=nCoeffs;i++) {
4403             int j= s->intra_scantable.permutated[i];
4404             level = block[j];
4405             if (level) {
4406                 if (level < 0) {
4407                     level = -level;
4408                     level = (((level << 1) + 1) * qscale *
4409                              ((int) (quant_matrix[j]))) >> 4;
4410                     level = (level - 1) | 1;
4411                     level = -level;
4412                 } else {
4413                     level = (((level << 1) + 1) * qscale *
4414                              ((int) (quant_matrix[j]))) >> 4;
4415                     level = (level - 1) | 1;
4416                 }
4417 #ifdef PARANOID
4418                 if (level < -2048 || level > 2047)
4419                     fprintf(stderr, "unquant error %d %d\n", i, level);
4420 #endif
4421                 block[j] = level;
4422             }
4423         }
4424     }
4425 }
4426
4427 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
4428                                    DCTELEM *block, int n, int qscale)
4429 {
4430     int i, level, nCoeffs;
4431     const uint16_t *quant_matrix;
4432
4433     if(s->alternate_scan) nCoeffs= 63;
4434     else nCoeffs= s->block_last_index[n];
4435
4436     if (s->mb_intra) {
4437         if (n < 4)
4438             block[0] = block[0] * s->y_dc_scale;
4439         else
4440             block[0] = block[0] * s->c_dc_scale;
4441         quant_matrix = s->intra_matrix;
4442         for(i=1;i<=nCoeffs;i++) {
4443             int j= s->intra_scantable.permutated[i];
4444             level = block[j];
4445             if (level) {
4446                 if (level < 0) {
4447                     level = -level;
4448                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4449                     level = -level;
4450                 } else {
4451                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4452                 }
4453 #ifdef PARANOID
4454                 if (level < -2048 || level > 2047)
4455                     fprintf(stderr, "unquant error %d %d\n", i, level);
4456 #endif
4457                 block[j] = level;
4458             }
4459         }
4460     } else {
4461         int sum=-1;
4462         i = 0;
4463         quant_matrix = s->inter_matrix;
4464         for(;i<=nCoeffs;i++) {
4465             int j= s->intra_scantable.permutated[i];
4466             level = block[j];
4467             if (level) {
4468                 if (level < 0) {
4469                     level = -level;
4470                     level = (((level << 1) + 1) * qscale *
4471                              ((int) (quant_matrix[j]))) >> 4;
4472                     level = -level;
4473                 } else {
4474                     level = (((level << 1) + 1) * qscale *
4475                              ((int) (quant_matrix[j]))) >> 4;
4476                 }
4477 #ifdef PARANOID
4478                 if (level < -2048 || level > 2047)
4479                     fprintf(stderr, "unquant error %d %d\n", i, level);
4480 #endif
4481                 block[j] = level;
4482                 sum+=level;
4483             }
4484         }
4485         block[63]^=sum&1;
4486     }
4487 }
4488
4489
4490 static void dct_unquantize_h263_c(MpegEncContext *s,
4491                                   DCTELEM *block, int n, int qscale)
4492 {
4493     int i, level, qmul, qadd;
4494     int nCoeffs;
4495
4496     assert(s->block_last_index[n]>=0);
4497
4498     qadd = (qscale - 1) | 1;
4499     qmul = qscale << 1;
4500
4501     if (s->mb_intra) {
4502         if (!s->h263_aic) {
4503             if (n < 4)
4504                 block[0] = block[0] * s->y_dc_scale;
4505             else
4506                 block[0] = block[0] * s->c_dc_scale;
4507         }else
4508             qadd = 0;
4509         i = 1;
4510         nCoeffs= 63; //does not allways use zigzag table
4511     } else {
4512         i = 0;
4513         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4514     }
4515
4516     for(;i<=nCoeffs;i++) {
4517         level = block[i];
4518         if (level) {
4519             if (level < 0) {
4520                 level = level * qmul - qadd;
4521             } else {
4522                 level = level * qmul + qadd;
4523             }
4524 #ifdef PARANOID
4525                 if (level < -2048 || level > 2047)
4526                     fprintf(stderr, "unquant error %d %d\n", i, level);
4527 #endif
4528             block[i] = level;
4529         }
4530     }
4531 }
4532
4533
4534 static const AVOption mpeg4_options[] =
4535 {
4536     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4537     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4538                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4539                        bit_rate_tolerance, 4, 240000000, 8000),
4540     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4541     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4542     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4543                           rc_eq, "tex^qComp,option1,options2", 0),
4544     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4545                        rc_min_rate, 4, 24000000, 0),
4546     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4547                        rc_max_rate, 4, 24000000, 0),
4548     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4549                           rc_buffer_aggressivity, 4, 24000000, 0),
4550     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4551                           rc_initial_cplx, 0., 9999999., 0),
4552     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4553                           i_quant_factor, 0., 0., 0),
4554     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4555                           i_quant_factor, -999999., 999999., 0),
4556     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4557                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4558     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4559                           lumi_masking, 0., 999999., 0),
4560     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4561                           temporal_cplx_masking, 0., 999999., 0),
4562     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4563                           spatial_cplx_masking, 0., 999999., 0),
4564     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4565                           p_masking, 0., 999999., 0),
4566     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4567                           dark_masking, 0., 999999., 0),
4568     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4569                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4570
4571     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4572                        mb_qmin, 0, 8, 0),
4573     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4574                        mb_qmin, 0, 8, 0),
4575
4576     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4577                        me_cmp, 0, 24000000, 0),
4578     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4579                        me_sub_cmp, 0, 24000000, 0),
4580
4581
4582     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4583                        dia_size, 0, 24000000, 0),
4584     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4585                        last_predictor_count, 0, 24000000, 0),
4586
4587     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4588                        pre_me, 0, 24000000, 0),
4589     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4590                        me_pre_cmp, 0, 24000000, 0),
4591
4592     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4593                        me_range, 0, 24000000, 0),
4594     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4595                        pre_dia_size, 0, 24000000, 0),
4596     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4597                        me_subpel_quality, 0, 24000000, 0),
4598     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4599                        me_range, 0, 24000000, 0),
4600     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4601                         flags, CODEC_FLAG_PSNR, 0),
4602     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4603                               rc_override),
4604     AVOPTION_SUB(avoptions_common),
4605     AVOPTION_END()
4606 };
4607
4608 #ifdef CONFIG_ENCODERS
4609
4610 AVCodec mpeg1video_encoder = {
4611     "mpeg1video",
4612     CODEC_TYPE_VIDEO,
4613     CODEC_ID_MPEG1VIDEO,
4614     sizeof(MpegEncContext),
4615     MPV_encode_init,
4616     MPV_encode_picture,
4617     MPV_encode_end,
4618 };
4619
4620 #ifdef CONFIG_RISKY
4621
4622 AVCodec mpeg2video_encoder = {
4623     "mpeg2video",
4624     CODEC_TYPE_VIDEO,
4625     CODEC_ID_MPEG2VIDEO,
4626     sizeof(MpegEncContext),
4627     MPV_encode_init,
4628     MPV_encode_picture,
4629     MPV_encode_end,
4630 };
4631
4632 AVCodec h263_encoder = {
4633     "h263",
4634     CODEC_TYPE_VIDEO,
4635     CODEC_ID_H263,
4636     sizeof(MpegEncContext),
4637     MPV_encode_init,
4638     MPV_encode_picture,
4639     MPV_encode_end,
4640 };
4641
4642 AVCodec h263p_encoder = {
4643     "h263p",
4644     CODEC_TYPE_VIDEO,
4645     CODEC_ID_H263P,
4646     sizeof(MpegEncContext),
4647     MPV_encode_init,
4648     MPV_encode_picture,
4649     MPV_encode_end,
4650 };
4651
4652 AVCodec flv_encoder = {
4653     "flv",
4654     CODEC_TYPE_VIDEO,
4655     CODEC_ID_FLV1,
4656     sizeof(MpegEncContext),
4657     MPV_encode_init,
4658     MPV_encode_picture,
4659     MPV_encode_end,
4660 };
4661
4662 AVCodec rv10_encoder = {
4663     "rv10",
4664     CODEC_TYPE_VIDEO,
4665     CODEC_ID_RV10,
4666     sizeof(MpegEncContext),
4667     MPV_encode_init,
4668     MPV_encode_picture,
4669     MPV_encode_end,
4670 };
4671
4672 AVCodec mpeg4_encoder = {
4673     "mpeg4",
4674     CODEC_TYPE_VIDEO,
4675     CODEC_ID_MPEG4,
4676     sizeof(MpegEncContext),
4677     MPV_encode_init,
4678     MPV_encode_picture,
4679     MPV_encode_end,
4680     .options = mpeg4_options,
4681 };
4682
4683 AVCodec msmpeg4v1_encoder = {
4684     "msmpeg4v1",
4685     CODEC_TYPE_VIDEO,
4686     CODEC_ID_MSMPEG4V1,
4687     sizeof(MpegEncContext),
4688     MPV_encode_init,
4689     MPV_encode_picture,
4690     MPV_encode_end,
4691     .options = mpeg4_options,
4692 };
4693
4694 AVCodec msmpeg4v2_encoder = {
4695     "msmpeg4v2",
4696     CODEC_TYPE_VIDEO,
4697     CODEC_ID_MSMPEG4V2,
4698     sizeof(MpegEncContext),
4699     MPV_encode_init,
4700     MPV_encode_picture,
4701     MPV_encode_end,
4702     .options = mpeg4_options,
4703 };
4704
4705 AVCodec msmpeg4v3_encoder = {
4706     "msmpeg4",
4707     CODEC_TYPE_VIDEO,
4708     CODEC_ID_MSMPEG4V3,
4709     sizeof(MpegEncContext),
4710     MPV_encode_init,
4711     MPV_encode_picture,
4712     MPV_encode_end,
4713     .options = mpeg4_options,
4714 };
4715
4716 AVCodec wmv1_encoder = {
4717     "wmv1",
4718     CODEC_TYPE_VIDEO,
4719     CODEC_ID_WMV1,
4720     sizeof(MpegEncContext),
4721     MPV_encode_init,
4722     MPV_encode_picture,
4723     MPV_encode_end,
4724     .options = mpeg4_options,
4725 };
4726
4727 #endif
4728
4729 AVCodec mjpeg_encoder = {
4730     "mjpeg",
4731     CODEC_TYPE_VIDEO,
4732     CODEC_ID_MJPEG,
4733     sizeof(MpegEncContext),
4734     MPV_encode_init,
4735     MPV_encode_picture,
4736     MPV_encode_end,
4737 };
4738
4739 #endif //CONFIG_ENCODERS
4740