git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 /**
  23  * @file mpegvideo.c
  24  * The simplest mpeg encoder (well, it was the simplest!).
  25  */
  26
  27 #include <ctype.h>
  28 #include <limits.h>
  29 #include "avcodec.h"
  30 #include "dsputil.h"
  31 #include "mpegvideo.h"
  32
  33 #ifdef USE_FASTMEMCPY
  34 #include "fastmemcpy.h"
  35 #endif
  36
  37 //#undef NDEBUG
  38 //#include <assert.h>
  39
  40 #ifdef CONFIG_ENCODERS
  41 static void encode_picture(MpegEncContext *s, int picture_number);
  42 #endif //CONFIG_ENCODERS
  43 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  44                                    DCTELEM *block, int n, int qscale);
  45 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  46                                    DCTELEM *block, int n, int qscale);
  47 static void dct_unquantize_h263_c(MpegEncContext *s,
  48                                   DCTELEM *block, int n, int qscale);
  49 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  50 #ifdef CONFIG_ENCODERS
  51 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  52 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  53 static int sse_mb(MpegEncContext *s);
  54 #endif //CONFIG_ENCODERS
  55
  56 #ifdef HAVE_XVMC
  57 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  58 extern void XVMC_field_end(MpegEncContext *s);
  59 extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
  60 #endif
  61
  62 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  63
  64
  65 /* enable all paranoid tests for rounding, overflows, etc... */
  66 //#define PARANOID
  67
  68 //#define DEBUG
  69
  70
  71 /* for jpeg fast DCT */
  72 #define CONST_BITS 14
  73
  74 static const uint16_t aanscales[64] = {
  75     /* precomputed values scaled up by 14 bits */
  76     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  77     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  78     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  79     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  80     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  81     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  82     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  83     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  84 };
  85
  86 static const uint8_t h263_chroma_roundtab[16] = {
  87 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  88     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  89 };
  90
  91 #ifdef CONFIG_ENCODERS
  92 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  93 static uint8_t default_fcode_tab[MAX_MV*2+1];
  94
  95 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
  96
  97 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  98                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
  99 {
 100     int qscale;
 101
 102     for(qscale=qmin; qscale<=qmax; qscale++){
 103         int i;
 104         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
 105             for(i=0;i<64;i++) {
 106                 const int j= s->dsp.idct_permutation[i];
 107                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 108                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 109                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 110                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 111
 112                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 113                                 (qscale * quant_matrix[j]));
 114             }
 115         } else if (s->dsp.fdct == fdct_ifast) {
 116             for(i=0;i<64;i++) {
 117                 const int j= s->dsp.idct_permutation[i];
 118                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 119                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 120                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 121                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 122
 123                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 124                                 (aanscales[i] * qscale * quant_matrix[j]));
 125             }
 126         } else {
 127             for(i=0;i<64;i++) {
 128                 const int j= s->dsp.idct_permutation[i];
 129                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 130                    So 16           <= qscale * quant_matrix[i]             <= 7905
 131                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 132                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 133                 */
 134                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 135 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 136                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 137
 138                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 139                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 140             }
 141         }
 142     }
 143 }
 144 #endif //CONFIG_ENCODERS
 145
 146 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 147     int i;
 148     int end;
 149
 150     st->scantable= src_scantable;
 151
 152     for(i=0; i<64; i++){
 153         int j;
 154         j = src_scantable[i];
 155         st->permutated[i] = permutation[j];
 156 #ifdef ARCH_POWERPC
 157         st->inverse[j] = i;
 158 #endif
 159     }
 160
 161     end=-1;
 162     for(i=0; i<64; i++){
 163         int j;
 164         j = st->permutated[i];
 165         if(j>end) end=j;
 166         st->raster_end[i]= end;
 167     }
 168 }
 169
 170 /* init common dct for both encoder and decoder */
 171 int DCT_common_init(MpegEncContext *s)
 172 {
 173     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 174     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 175     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 176
 177 #ifdef CONFIG_ENCODERS
 178     s->dct_quantize= dct_quantize_c;
 179 #endif
 180
 181 #ifdef HAVE_MMX
 182     MPV_common_init_mmx(s);
 183 #endif
 184 #ifdef ARCH_ALPHA
 185     MPV_common_init_axp(s);
 186 #endif
 187 #ifdef HAVE_MLIB
 188     MPV_common_init_mlib(s);
 189 #endif
 190 #ifdef HAVE_MMI
 191     MPV_common_init_mmi(s);
 192 #endif
 193 #ifdef ARCH_ARMV4L
 194     MPV_common_init_armv4l(s);
 195 #endif
 196 #ifdef ARCH_POWERPC
 197     MPV_common_init_ppc(s);
 198 #endif
 199
 200 #ifdef CONFIG_ENCODERS
 201     s->fast_dct_quantize= s->dct_quantize;
 202
 203     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 204         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 205     }
 206
 207 #endif //CONFIG_ENCODERS
 208
 209     /* load & permutate scantables
 210        note: only wmv uses differnt ones
 211     */
 212     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 213     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 214     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 215     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 216
 217     s->picture_structure= PICT_FRAME;
 218
 219     return 0;
 220 }
 221
 222 /**
 223  * allocates a Picture
 224  * The pixels are allocated/set by calling get_buffer() if shared=0
 225  */
 226 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 227     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 228     const int mb_array_size= s->mb_stride*s->mb_height;
 229     int i;
 230
 231     if(shared){
 232         assert(pic->data[0]);
 233         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 234         pic->type= FF_BUFFER_TYPE_SHARED;
 235     }else{
 236         int r;
 237
 238         assert(!pic->data[0]);
 239
 240         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 241
 242         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 243             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 244             return -1;
 245         }
 246
 247         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 248             fprintf(stderr, "get_buffer() failed (stride changed)\n");
 249             return -1;
 250         }
 251
 252         if(pic->linesize[1] != pic->linesize[2]){
 253             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
 254             return -1;
 255         }
 256
 257         s->linesize  = pic->linesize[0];
 258         s->uvlinesize= pic->linesize[1];
 259     }
 260
 261     if(pic->qscale_table==NULL){
 262         if (s->encoding) {
 263             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 264             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 265             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 266             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
 267         }
 268
 269         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 270         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 271         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
 272         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 273         if(s->out_format == FMT_H264){
 274             for(i=0; i<2; i++){
 275                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
 276                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
 277             }
 278         }
 279         pic->qstride= s->mb_stride;
 280     }
 281
 282     //it might be nicer if the application would keep track of these but it would require a API change
 283     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 284     s->prev_pict_types[0]= s->pict_type;
 285     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 286         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 287
 288     return 0;
 289 fail: //for the CHECKED_ALLOCZ macro
 290     return -1;
 291 }
 292
 293 /**
 294  * deallocates a picture
 295  */
 296 static void free_picture(MpegEncContext *s, Picture *pic){
 297     int i;
 298
 299     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 300         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 301     }
 302
 303     av_freep(&pic->mb_var);
 304     av_freep(&pic->mc_mb_var);
 305     av_freep(&pic->mb_mean);
 306     av_freep(&pic->mb_cmp_score);
 307     av_freep(&pic->mbskip_table);
 308     av_freep(&pic->qscale_table);
 309     av_freep(&pic->mb_type_base);
 310     pic->mb_type= NULL;
 311     for(i=0; i<2; i++){
 312         av_freep(&pic->motion_val[i]);
 313         av_freep(&pic->ref_index[i]);
 314     }
 315
 316     if(pic->type == FF_BUFFER_TYPE_SHARED){
 317         for(i=0; i<4; i++){
 318             pic->base[i]=
 319             pic->data[i]= NULL;
 320         }
 321         pic->type= 0;
 322     }
 323 }
 324
 325 /* init common structure for both encoder and decoder */
 326 int MPV_common_init(MpegEncContext *s)
 327 {
 328     int y_size, c_size, yc_size, i, mb_array_size, x, y;
 329
 330     dsputil_init(&s->dsp, s->avctx);
 331     DCT_common_init(s);
 332
 333     s->flags= s->avctx->flags;
 334
 335     s->mb_width  = (s->width  + 15) / 16;
 336     s->mb_height = (s->height + 15) / 16;
 337     s->mb_stride = s->mb_width + 1;
 338     mb_array_size= s->mb_height * s->mb_stride;
 339
 340     /* set default edge pos, will be overriden in decode_header if needed */
 341     s->h_edge_pos= s->mb_width*16;
 342     s->v_edge_pos= s->mb_height*16;
 343
 344     s->mb_num = s->mb_width * s->mb_height;
 345
 346     s->block_wrap[0]=
 347     s->block_wrap[1]=
 348     s->block_wrap[2]=
 349     s->block_wrap[3]= s->mb_width*2 + 2;
 350     s->block_wrap[4]=
 351     s->block_wrap[5]= s->mb_width + 2;
 352
 353     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 354     c_size = (s->mb_width + 2) * (s->mb_height + 2);
 355     yc_size = y_size + 2 * c_size;
 356
 357     /* convert fourcc to upper case */
 358     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 359                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 360                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 361                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 362
 363     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 364     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 365
 366     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 367
 368     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 369     for(y=0; y<s->mb_height; y++){
 370         for(x=0; x<s->mb_width; x++){
 371             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 372         }
 373     }
 374     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 375
 376     if (s->encoding) {
 377         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
 378
 379         /* Allocate MV tables */
 380         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 381         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 382         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 383         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 384         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 385         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 386         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 387         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 388         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 389         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 390         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 391         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 392
 393         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 394         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t))
 395
 396         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 397         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 398
 399         if(s->codec_id==CODEC_ID_MPEG4){
 400             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 401             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 402         }
 403
 404         if(s->msmpeg4_version){
 405             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 406         }
 407         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 408
 409         /* Allocate MB type table */
 410         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
 411     }
 412
 413     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 414
 415     if (s->out_format == FMT_H263 || s->encoding) {
 416         int size;
 417
 418         /* MV prediction */
 419         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 420         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
 421     }
 422
 423     if(s->codec_id==CODEC_ID_MPEG4){
 424         /* interlaced direct mode decoding tables */
 425         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
 426         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
 427     }
 428     if (s->out_format == FMT_H263) {
 429         /* ac values */
 430         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
 431         s->ac_val[1] = s->ac_val[0] + y_size;
 432         s->ac_val[2] = s->ac_val[1] + c_size;
 433
 434         /* cbp values */
 435         CHECKED_ALLOCZ(s->coded_block, y_size);
 436
 437         /* divx501 bitstream reorder buffer */
 438         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 439
 440         /* cbp, ac_pred, pred_dir */
 441         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 442         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 443     }
 444
 445     if (s->h263_pred || s->h263_plus || !s->encoding) {
 446         /* dc values */
 447         //MN: we need these for error resilience of intra-frames
 448         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
 449         s->dc_val[1] = s->dc_val[0] + y_size;
 450         s->dc_val[2] = s->dc_val[1] + c_size;
 451         for(i=0;i<yc_size;i++)
 452             s->dc_val[0][i] = 1024;
 453     }
 454
 455     /* which mb is a intra block */
 456     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 457     memset(s->mbintra_table, 1, mb_array_size);
 458
 459     /* default structure is frame */
 460     s->picture_structure = PICT_FRAME;
 461
 462     /* init macroblock skip table */
 463     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 464     //Note the +1 is for a quicker mpeg4 slice_end detection
 465     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 466
 467     s->block= s->blocks[0];
 468
 469     s->parse_context.state= -1;
 470
 471     s->context_initialized = 1;
 472     return 0;
 473  fail:
 474     MPV_common_end(s);
 475     return -1;
 476 }
 477
 478
 479 //extern int sads;
 480
 481 /* init common structure for both encoder and decoder */
 482 void MPV_common_end(MpegEncContext *s)
 483 {
 484     int i;
 485
 486     av_freep(&s->parse_context.buffer);
 487     s->parse_context.buffer_size=0;
 488
 489     av_freep(&s->mb_type);
 490     av_freep(&s->p_mv_table_base);
 491     av_freep(&s->b_forw_mv_table_base);
 492     av_freep(&s->b_back_mv_table_base);
 493     av_freep(&s->b_bidir_forw_mv_table_base);
 494     av_freep(&s->b_bidir_back_mv_table_base);
 495     av_freep(&s->b_direct_mv_table_base);
 496     s->p_mv_table= NULL;
 497     s->b_forw_mv_table= NULL;
 498     s->b_back_mv_table= NULL;
 499     s->b_bidir_forw_mv_table= NULL;
 500     s->b_bidir_back_mv_table= NULL;
 501     s->b_direct_mv_table= NULL;
 502
 503     av_freep(&s->motion_val);
 504     av_freep(&s->dc_val[0]);
 505     av_freep(&s->ac_val[0]);
 506     av_freep(&s->coded_block);
 507     av_freep(&s->mbintra_table);
 508     av_freep(&s->cbp_table);
 509     av_freep(&s->pred_dir_table);
 510     av_freep(&s->me.scratchpad);
 511     av_freep(&s->me.map);
 512     av_freep(&s->me.score_map);
 513
 514     av_freep(&s->mbskip_table);
 515     av_freep(&s->prev_pict_types);
 516     av_freep(&s->bitstream_buffer);
 517     av_freep(&s->tex_pb_buffer);
 518     av_freep(&s->pb2_buffer);
 519     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 520     av_freep(&s->field_mv_table);
 521     av_freep(&s->field_select_table);
 522     av_freep(&s->avctx->stats_out);
 523     av_freep(&s->ac_stats);
 524     av_freep(&s->error_status_table);
 525     av_freep(&s->mb_index2xy);
 526
 527     for(i=0; i<MAX_PICTURE_COUNT; i++){
 528         free_picture(s, &s->picture[i]);
 529     }
 530     avcodec_default_free_buffers(s->avctx);
 531     s->context_initialized = 0;
 532 }
 533
 534 #ifdef CONFIG_ENCODERS
 535
 536 /* init video encoder */
 537 int MPV_encode_init(AVCodecContext *avctx)
 538 {
 539     MpegEncContext *s = avctx->priv_data;
 540     int i;
 541     int chroma_h_shift, chroma_v_shift;
 542
 543     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
 544
 545     s->bit_rate = avctx->bit_rate;
 546     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 547     s->width = avctx->width;
 548     s->height = avctx->height;
 549     if(avctx->gop_size > 600){
 550         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 551         avctx->gop_size=600;
 552     }
 553     s->gop_size = avctx->gop_size;
 554     s->rtp_mode = avctx->rtp_mode;
 555     s->rtp_payload_size = avctx->rtp_payload_size;
 556     if (avctx->rtp_callback)
 557         s->rtp_callback = avctx->rtp_callback;
 558     s->max_qdiff= avctx->max_qdiff;
 559     s->qcompress= avctx->qcompress;
 560     s->qblur= avctx->qblur;
 561     s->avctx = avctx;
 562     s->flags= avctx->flags;
 563     s->max_b_frames= avctx->max_b_frames;
 564     s->b_frame_strategy= avctx->b_frame_strategy;
 565     s->codec_id= avctx->codec->id;
 566     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 567     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 568     s->strict_std_compliance= avctx->strict_std_compliance;
 569     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 570     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 571     s->mpeg_quant= avctx->mpeg_quant;
 572
 573     if (s->gop_size <= 1) {
 574         s->intra_only = 1;
 575         s->gop_size = 12;
 576     } else {
 577         s->intra_only = 0;
 578     }
 579
 580     s->me_method = avctx->me_method;
 581
 582     /* Fixed QSCALE */
 583     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 584
 585     s->adaptive_quant= (   s->avctx->lumi_masking
 586                         || s->avctx->dark_masking
 587                         || s->avctx->temporal_cplx_masking
 588                         || s->avctx->spatial_cplx_masking
 589                         || s->avctx->p_masking)
 590                        && !s->fixed_qscale;
 591
 592     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 593
 594     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
 595         fprintf(stderr, "4MV not supporetd by codec\n");
 596         return -1;
 597     }
 598
 599     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
 600         fprintf(stderr, "qpel not supporetd by codec\n");
 601         return -1;
 602     }
 603
 604     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
 605         fprintf(stderr, "data partitioning not supporetd by codec\n");
 606         return -1;
 607     }
 608
 609     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO){
 610         fprintf(stderr, "b frames not supporetd by codec\n");
 611         return -1;
 612     }
 613
 614     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
 615         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
 616         return -1;
 617     }
 618
 619     if(s->codec_id==CODEC_ID_MJPEG){
 620         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
 621         s->inter_quant_bias= 0;
 622     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO){
 623         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
 624         s->inter_quant_bias= 0;
 625     }else{
 626         s->intra_quant_bias=0;
 627         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
 628     }
 629
 630     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 631         s->intra_quant_bias= avctx->intra_quant_bias;
 632     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 633         s->inter_quant_bias= avctx->inter_quant_bias;
 634
 635     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 636
 637     switch(avctx->codec->id) {
 638     case CODEC_ID_MPEG1VIDEO:
 639         s->out_format = FMT_MPEG1;
 640         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
 641         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 642         break;
 643     case CODEC_ID_LJPEG:
 644     case CODEC_ID_MJPEG:
 645         s->out_format = FMT_MJPEG;
 646         s->intra_only = 1; /* force intra only for jpeg */
 647         s->mjpeg_write_tables = 1; /* write all tables */
 648         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 649         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
 650         s->mjpeg_vsample[1] = 1;
 651         s->mjpeg_vsample[2] = 1;
 652         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
 653         s->mjpeg_hsample[1] = 1;
 654         s->mjpeg_hsample[2] = 1;
 655         if (mjpeg_init(s) < 0)
 656             return -1;
 657         avctx->delay=0;
 658         s->low_delay=1;
 659         break;
 660 #ifdef CONFIG_RISKY
 661     case CODEC_ID_H263:
 662         if (h263_get_picture_format(s->width, s->height) == 7) {
 663             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 664             return -1;
 665         }
 666         s->out_format = FMT_H263;
 667         avctx->delay=0;
 668         s->low_delay=1;
 669         break;
 670     case CODEC_ID_H263P:
 671         s->out_format = FMT_H263;
 672         s->h263_plus = 1;
 673         /* Fx */
 674         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
 675         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
 676         /* /Fx */
 677         /* These are just to be sure */
 678         s->umvplus = 1;
 679         avctx->delay=0;
 680         s->low_delay=1;
 681         break;
 682     case CODEC_ID_FLV1:
 683         s->out_format = FMT_H263;
 684         s->h263_flv = 2; /* format = 1; 11-bit codes */
 685         s->unrestricted_mv = 1;
 686         s->rtp_mode=0; /* don't allow GOB */
 687         avctx->delay=0;
 688         s->low_delay=1;
 689         break;
 690     case CODEC_ID_RV10:
 691         s->out_format = FMT_H263;
 692         s->h263_rv10 = 1;
 693         avctx->delay=0;
 694         s->low_delay=1;
 695         break;
 696     case CODEC_ID_MPEG4:
 697         s->out_format = FMT_H263;
 698         s->h263_pred = 1;
 699         s->unrestricted_mv = 1;
 700         s->low_delay= s->max_b_frames ? 0 : 1;
 701         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 702         break;
 703     case CODEC_ID_MSMPEG4V1:
 704         s->out_format = FMT_H263;
 705         s->h263_msmpeg4 = 1;
 706         s->h263_pred = 1;
 707         s->unrestricted_mv = 1;
 708         s->msmpeg4_version= 1;
 709         avctx->delay=0;
 710         s->low_delay=1;
 711         break;
 712     case CODEC_ID_MSMPEG4V2:
 713         s->out_format = FMT_H263;
 714         s->h263_msmpeg4 = 1;
 715         s->h263_pred = 1;
 716         s->unrestricted_mv = 1;
 717         s->msmpeg4_version= 2;
 718         avctx->delay=0;
 719         s->low_delay=1;
 720         break;
 721     case CODEC_ID_MSMPEG4V3:
 722         s->out_format = FMT_H263;
 723         s->h263_msmpeg4 = 1;
 724         s->h263_pred = 1;
 725         s->unrestricted_mv = 1;
 726         s->msmpeg4_version= 3;
 727         s->flipflop_rounding=1;
 728         avctx->delay=0;
 729         s->low_delay=1;
 730         break;
 731     case CODEC_ID_WMV1:
 732         s->out_format = FMT_H263;
 733         s->h263_msmpeg4 = 1;
 734         s->h263_pred = 1;
 735         s->unrestricted_mv = 1;
 736         s->msmpeg4_version= 4;
 737         s->flipflop_rounding=1;
 738         avctx->delay=0;
 739         s->low_delay=1;
 740         break;
 741     case CODEC_ID_WMV2:
 742         s->out_format = FMT_H263;
 743         s->h263_msmpeg4 = 1;
 744         s->h263_pred = 1;
 745         s->unrestricted_mv = 1;
 746         s->msmpeg4_version= 5;
 747         s->flipflop_rounding=1;
 748         avctx->delay=0;
 749         s->low_delay=1;
 750         break;
 751 #endif
 752     default:
 753         return -1;
 754     }
 755
 756     { /* set up some save defaults, some codecs might override them later */
 757         static int done=0;
 758         if(!done){
 759             int i;
 760             done=1;
 761
 762             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 763             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
 764             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 765
 766             for(i=-16; i<16; i++){
 767                 default_fcode_tab[i + MAX_MV]= 1;
 768             }
 769         }
 770     }
 771     s->me.mv_penalty= default_mv_penalty;
 772     s->fcode_tab= default_fcode_tab;
 773     s->y_dc_scale_table=
 774     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 775
 776     /* dont use mv_penalty table for crap MV as it would be confused */
 777     //FIXME remove after fixing / removing old ME
 778     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
 779
 780     s->encoding = 1;
 781
 782     /* init */
 783     if (MPV_common_init(s) < 0)
 784         return -1;
 785
 786     ff_init_me(s);
 787
 788 #ifdef CONFIG_ENCODERS
 789 #ifdef CONFIG_RISKY
 790     if (s->out_format == FMT_H263)
 791         h263_encode_init(s);
 792     if(s->msmpeg4_version)
 793         ff_msmpeg4_encode_init(s);
 794 #endif
 795     if (s->out_format == FMT_MPEG1)
 796         ff_mpeg1_encode_init(s);
 797 #endif
 798
 799     /* init default q matrix */
 800     for(i=0;i<64;i++) {
 801         int j= s->dsp.idct_permutation[i];
 802 #ifdef CONFIG_RISKY
 803         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 804             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 805             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 806         }else if(s->out_format == FMT_H263){
 807             s->intra_matrix[j] =
 808             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 809         }else
 810 #endif
 811         { /* mpeg1 */
 812             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 813             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 814         }
 815     }
 816
 817     /* precompute matrix */
 818     /* for mjpeg, we do include qscale in the matrix */
 819     if (s->out_format != FMT_MJPEG) {
 820         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 821                        s->intra_matrix, s->intra_quant_bias, 1, 31);
 822         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 823                        s->inter_matrix, s->inter_quant_bias, 1, 31);
 824     }
 825
 826     if(ff_rate_control_init(s) < 0)
 827         return -1;
 828
 829     s->picture_number = 0;
 830     s->picture_in_gop_number = 0;
 831     s->fake_picture_number = 0;
 832     /* motion detector init */
 833     s->f_code = 1;
 834     s->b_code = 1;
 835
 836     return 0;
 837 }
 838
 839 int MPV_encode_end(AVCodecContext *avctx)
 840 {
 841     MpegEncContext *s = avctx->priv_data;
 842
 843 #ifdef STATS
 844     print_stats();
 845 #endif
 846
 847     ff_rate_control_uninit(s);
 848
 849     MPV_common_end(s);
 850     if (s->out_format == FMT_MJPEG)
 851         mjpeg_close(s);
 852
 853     return 0;
 854 }
 855
 856 #endif //CONFIG_ENCODERS
 857
 858 void init_rl(RLTable *rl)
 859 {
 860     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 861     uint8_t index_run[MAX_RUN+1];
 862     int last, run, level, start, end, i;
 863
 864     /* compute max_level[], max_run[] and index_run[] */
 865     for(last=0;last<2;last++) {
 866         if (last == 0) {
 867             start = 0;
 868             end = rl->last;
 869         } else {
 870             start = rl->last;
 871             end = rl->n;
 872         }
 873
 874         memset(max_level, 0, MAX_RUN + 1);
 875         memset(max_run, 0, MAX_LEVEL + 1);
 876         memset(index_run, rl->n, MAX_RUN + 1);
 877         for(i=start;i<end;i++) {
 878             run = rl->table_run[i];
 879             level = rl->table_level[i];
 880             if (index_run[run] == rl->n)
 881                 index_run[run] = i;
 882             if (level > max_level[run])
 883                 max_level[run] = level;
 884             if (run > max_run[level])
 885                 max_run[level] = run;
 886         }
 887         rl->max_level[last] = av_malloc(MAX_RUN + 1);
 888         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 889         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 890         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 891         rl->index_run[last] = av_malloc(MAX_RUN + 1);
 892         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 893     }
 894 }
 895
 896 /* draw the edges of width 'w' of an image of size width, height */
 897 //FIXME check that this is ok for mpeg4 interlaced
 898 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 899 {
 900     uint8_t *ptr, *last_line;
 901     int i;
 902
 903     last_line = buf + (height - 1) * wrap;
 904     for(i=0;i<w;i++) {
 905         /* top and bottom */
 906         memcpy(buf - (i + 1) * wrap, buf, width);
 907         memcpy(last_line + (i + 1) * wrap, last_line, width);
 908     }
 909     /* left and right */
 910     ptr = buf;
 911     for(i=0;i<height;i++) {
 912         memset(ptr - w, ptr[0], w);
 913         memset(ptr + width, ptr[width-1], w);
 914         ptr += wrap;
 915     }
 916     /* corners */
 917     for(i=0;i<w;i++) {
 918         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 919         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 920         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 921         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 922     }
 923 }
 924
 925 static int find_unused_picture(MpegEncContext *s, int shared){
 926     int i;
 927
 928     if(shared){
 929         for(i=0; i<MAX_PICTURE_COUNT; i++){
 930             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
 931         }
 932     }else{
 933         for(i=0; i<MAX_PICTURE_COUNT; i++){
 934             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
 935         }
 936         for(i=0; i<MAX_PICTURE_COUNT; i++){
 937             if(s->picture[i].data[0]==NULL) break;
 938         }
 939     }
 940
 941     assert(i<MAX_PICTURE_COUNT);
 942     return i;
 943 }
 944
 945 /* generic function for encode/decode called before a frame is coded/decoded */
 946 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 947 {
 948     int i;
 949     AVFrame *pic;
 950
 951     s->mb_skiped = 0;
 952
 953     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 954
 955     /* mark&release old frames */
 956     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
 957         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 958
 959         /* release forgotten pictures */
 960         /* if(mpeg124/h263) */
 961         if(!s->encoding){
 962             for(i=0; i<MAX_PICTURE_COUNT; i++){
 963                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 964                     fprintf(stderr, "releasing zombie picture\n");
 965                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 966                 }
 967             }
 968         }
 969     }
 970 alloc:
 971     if(!s->encoding){
 972         /* release non refernce frames */
 973         for(i=0; i<MAX_PICTURE_COUNT; i++){
 974             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 975                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
 976             }
 977         }
 978
 979         i= find_unused_picture(s, 0);
 980
 981         pic= (AVFrame*)&s->picture[i];
 982         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
 983
 984         if(s->current_picture_ptr)
 985             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
 986
 987         if( alloc_picture(s, (Picture*)pic, 0) < 0)
 988             return -1;
 989
 990         s->current_picture_ptr= &s->picture[i];
 991     }
 992
 993     s->current_picture_ptr->pict_type= s->pict_type;
 994     s->current_picture_ptr->quality= s->qscale;
 995     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
 996
 997     s->current_picture= *s->current_picture_ptr;
 998
 999   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1000     if (s->pict_type != B_TYPE) {
1001         s->last_picture_ptr= s->next_picture_ptr;
1002         s->next_picture_ptr= s->current_picture_ptr;
1003     }
1004
1005     if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
1006     if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
1007     if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
1008
1009     if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
1010         fprintf(stderr, "warning: first frame is no keyframe\n");
1011         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1012         goto alloc;
1013     }
1014
1015     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1016
1017     if(s->picture_structure!=PICT_FRAME){
1018         int i;
1019         for(i=0; i<4; i++){
1020             if(s->picture_structure == PICT_BOTTOM_FIELD){
1021                  s->current_picture.data[i] += s->current_picture.linesize[i];
1022             }
1023             s->current_picture.linesize[i] *= 2;
1024             s->last_picture.linesize[i] *=2;
1025             s->next_picture.linesize[i] *=2;
1026         }
1027     }
1028   }
1029
1030     s->hurry_up= s->avctx->hurry_up;
1031     s->error_resilience= avctx->error_resilience;
1032
1033     /* set dequantizer, we cant do it during init as it might change for mpeg4
1034        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1035     if(s->out_format == FMT_H263){
1036         if(s->mpeg_quant)
1037             s->dct_unquantize = s->dct_unquantize_mpeg2;
1038         else
1039             s->dct_unquantize = s->dct_unquantize_h263;
1040     }else
1041         s->dct_unquantize = s->dct_unquantize_mpeg1;
1042
1043 #ifdef HAVE_XVMC
1044     if(s->avctx->xvmc_acceleration)
1045         return XVMC_field_start(s, avctx);
1046 #endif
1047     return 0;
1048 }
1049
1050 /* generic function for encode/decode called after a frame has been coded/decoded */
1051 void MPV_frame_end(MpegEncContext *s)
1052 {
1053     int i;
1054     /* draw edge for correct motion prediction if outside */
1055 #ifdef HAVE_XVMC
1056 //just to make sure that all data is rendered.
1057     if(s->avctx->xvmc_acceleration){
1058         XVMC_field_end(s);
1059     }else
1060 #endif
1061     if(s->codec_id!=CODEC_ID_SVQ1 && s->codec_id != CODEC_ID_MPEG1VIDEO){
1062         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1063             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1064             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1065             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1066         }
1067     }
1068     emms_c();
1069
1070     s->last_pict_type    = s->pict_type;
1071     if(s->pict_type!=B_TYPE){
1072         s->last_non_b_pict_type= s->pict_type;
1073     }
1074 #if 0
1075         /* copy back current_picture variables */
1076     for(i=0; i<MAX_PICTURE_COUNT; i++){
1077         if(s->picture[i].data[0] == s->current_picture.data[0]){
1078             s->picture[i]= s->current_picture;
1079             break;
1080         }
1081     }
1082     assert(i<MAX_PICTURE_COUNT);
1083 #endif
1084
1085     if(s->encoding){
1086         /* release non refernce frames */
1087         for(i=0; i<MAX_PICTURE_COUNT; i++){
1088             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1089                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1090             }
1091         }
1092     }
1093     // clear copies, to avoid confusion
1094 #if 0
1095     memset(&s->last_picture, 0, sizeof(Picture));
1096     memset(&s->next_picture, 0, sizeof(Picture));
1097     memset(&s->current_picture, 0, sizeof(Picture));
1098 #endif
1099 }
1100
1101 /**
1102  * draws an line from (ex, ey) -> (sx, sy).
1103  * @param w width of the image
1104  * @param h height of the image
1105  * @param stride stride/linesize of the image
1106  * @param color color of the arrow
1107  */
1108 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1109     int t, x, y, f;
1110
1111     sx= clip(sx, 0, w-1);
1112     sy= clip(sy, 0, h-1);
1113     ex= clip(ex, 0, w-1);
1114     ey= clip(ey, 0, h-1);
1115
1116     buf[sy*stride + sx]+= color;
1117
1118     if(ABS(ex - sx) > ABS(ey - sy)){
1119         if(sx > ex){
1120             t=sx; sx=ex; ex=t;
1121             t=sy; sy=ey; ey=t;
1122         }
1123         buf+= sx + sy*stride;
1124         ex-= sx;
1125         f= ((ey-sy)<<16)/ex;
1126         for(x= 0; x <= ex; x++){
1127             y= ((x*f) + (1<<15))>>16;
1128             buf[y*stride + x]+= color;
1129         }
1130     }else{
1131         if(sy > ey){
1132             t=sx; sx=ex; ex=t;
1133             t=sy; sy=ey; ey=t;
1134         }
1135         buf+= sx + sy*stride;
1136         ey-= sy;
1137         if(ey) f= ((ex-sx)<<16)/ey;
1138         else   f= 0;
1139         for(y= 0; y <= ey; y++){
1140             x= ((y*f) + (1<<15))>>16;
1141             buf[y*stride + x]+= color;
1142         }
1143     }
1144 }
1145
1146 /**
1147  * draws an arrow from (ex, ey) -> (sx, sy).
1148  * @param w width of the image
1149  * @param h height of the image
1150  * @param stride stride/linesize of the image
1151  * @param color color of the arrow
1152  */
1153 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1154     int dx,dy;
1155
1156     sx= clip(sx, -100, w+100);
1157     sy= clip(sy, -100, h+100);
1158     ex= clip(ex, -100, w+100);
1159     ey= clip(ey, -100, h+100);
1160
1161     dx= ex - sx;
1162     dy= ey - sy;
1163
1164     if(dx*dx + dy*dy > 3*3){
1165         int rx=  dx + dy;
1166         int ry= -dx + dy;
1167         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1168
1169         //FIXME subpixel accuracy
1170         rx= ROUNDED_DIV(rx*3<<4, length);
1171         ry= ROUNDED_DIV(ry*3<<4, length);
1172
1173         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1174         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1175     }
1176     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1177 }
1178
1179 /**
1180  * prints debuging info for the given picture.
1181  */
1182 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1183
1184     if(!pict || !pict->mb_type) return;
1185
1186     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1187         int x,y;
1188
1189         for(y=0; y<s->mb_height; y++){
1190             for(x=0; x<s->mb_width; x++){
1191                 if(s->avctx->debug&FF_DEBUG_SKIP){
1192                     int count= s->mbskip_table[x + y*s->mb_stride];
1193                     if(count>9) count=9;
1194                     printf("%1d", count);
1195                 }
1196                 if(s->avctx->debug&FF_DEBUG_QP){
1197                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1198                 }
1199                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1200                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1201
1202                     //Type & MV direction
1203                     if(IS_PCM(mb_type))
1204                         printf("P");
1205                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1206                         printf("A");
1207                     else if(IS_INTRA4x4(mb_type))
1208                         printf("i");
1209                     else if(IS_INTRA16x16(mb_type))
1210                         printf("I");
1211                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1212                         printf("d");
1213                     else if(IS_DIRECT(mb_type))
1214                         printf("D");
1215                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1216                         printf("g");
1217                     else if(IS_GMC(mb_type))
1218                         printf("G");
1219                     else if(IS_SKIP(mb_type))
1220                         printf("S");
1221                     else if(!USES_LIST(mb_type, 1))
1222                         printf(">");
1223                     else if(!USES_LIST(mb_type, 0))
1224                         printf("<");
1225                     else{
1226                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1227                         printf("X");
1228                     }
1229
1230                     //segmentation
1231                     if(IS_8X8(mb_type))
1232                         printf("+");
1233                     else if(IS_16X8(mb_type))
1234                         printf("-");
1235                     else if(IS_8X16(mb_type))
1236                         printf("¦");
1237                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1238                         printf(" ");
1239                     else
1240                         printf("?");
1241
1242
1243                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1244                         printf("=");
1245                     else
1246                         printf(" ");
1247                 }
1248 //                printf(" ");
1249             }
1250             printf("\n");
1251         }
1252     }
1253
1254     if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
1255         const int shift= 1 + s->quarter_sample;
1256         int mb_y;
1257         uint8_t *ptr= pict->data[0];
1258         s->low_delay=0; //needed to see the vectors without trashing the buffers
1259
1260         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1261             int mb_x;
1262             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1263                 const int mb_index= mb_x + mb_y*s->mb_stride;
1264                 if(IS_8X8(s->current_picture.mb_type[mb_index])){
1265                     int i;
1266                     for(i=0; i<4; i++){
1267                         int sx= mb_x*16 + 4 + 8*(i&1);
1268                         int sy= mb_y*16 + 4 + 8*(i>>1);
1269                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1270                         int mx= (s->motion_val[xy][0]>>shift) + sx;
1271                         int my= (s->motion_val[xy][1]>>shift) + sy;
1272                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1273                     }
1274                 }else{
1275                     int sx= mb_x*16 + 8;
1276                     int sy= mb_y*16 + 8;
1277                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1278                     int mx= (s->motion_val[xy][0]>>shift) + sx;
1279                     int my= (s->motion_val[xy][1]>>shift) + sy;
1280                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1281                 }
1282                 s->mbskip_table[mb_index]=0;
1283             }
1284         }
1285     }
1286 }
1287
1288 #ifdef CONFIG_ENCODERS
1289
1290 static int get_sae(uint8_t *src, int ref, int stride){
1291     int x,y;
1292     int acc=0;
1293
1294     for(y=0; y<16; y++){
1295         for(x=0; x<16; x++){
1296             acc+= ABS(src[x+y*stride] - ref);
1297         }
1298     }
1299
1300     return acc;
1301 }
1302
1303 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1304     int x, y, w, h;
1305     int acc=0;
1306
1307     w= s->width &~15;
1308     h= s->height&~15;
1309
1310     for(y=0; y<h; y+=16){
1311         for(x=0; x<w; x+=16){
1312             int offset= x + y*stride;
1313             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1314             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1315             int sae = get_sae(src + offset, mean, stride);
1316
1317             acc+= sae + 500 < sad;
1318         }
1319     }
1320     return acc;
1321 }
1322
1323
1324 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1325     AVFrame *pic=NULL;
1326     int i;
1327     const int encoding_delay= s->max_b_frames;
1328     int direct=1;
1329
1330   if(pic_arg){
1331     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1332     if(pic_arg->linesize[0] != s->linesize) direct=0;
1333     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1334     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1335
1336 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1337
1338     if(direct){
1339         i= find_unused_picture(s, 1);
1340
1341         pic= (AVFrame*)&s->picture[i];
1342         pic->reference= 3;
1343
1344         for(i=0; i<4; i++){
1345             pic->data[i]= pic_arg->data[i];
1346             pic->linesize[i]= pic_arg->linesize[i];
1347         }
1348         alloc_picture(s, (Picture*)pic, 1);
1349     }else{
1350         i= find_unused_picture(s, 0);
1351
1352         pic= (AVFrame*)&s->picture[i];
1353         pic->reference= 3;
1354
1355         alloc_picture(s, (Picture*)pic, 0);
1356         for(i=0; i<4; i++){
1357             /* the input will be 16 pixels to the right relative to the actual buffer start
1358              * and the current_pic, so the buffer can be reused, yes its not beatifull
1359              */
1360             pic->data[i]+= 16;
1361         }
1362
1363         if(   pic->data[0] == pic_arg->data[0]
1364            && pic->data[1] == pic_arg->data[1]
1365            && pic->data[2] == pic_arg->data[2]){
1366        // empty
1367         }else{
1368             int h_chroma_shift, v_chroma_shift;
1369             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1370
1371             for(i=0; i<3; i++){
1372                 int src_stride= pic_arg->linesize[i];
1373                 int dst_stride= i ? s->uvlinesize : s->linesize;
1374                 int h_shift= i ? h_chroma_shift : 0;
1375                 int v_shift= i ? v_chroma_shift : 0;
1376                 int w= s->width >>h_shift;
1377                 int h= s->height>>v_shift;
1378                 uint8_t *src= pic_arg->data[i];
1379                 uint8_t *dst= pic->data[i];
1380
1381                 if(src_stride==dst_stride)
1382                     memcpy(dst, src, src_stride*h);
1383                 else{
1384                     while(h--){
1385                         memcpy(dst, src, w);
1386                         dst += dst_stride;
1387                         src += src_stride;
1388                     }
1389                 }
1390             }
1391         }
1392     }
1393     pic->quality= pic_arg->quality;
1394     pic->pict_type= pic_arg->pict_type;
1395     pic->pts = pic_arg->pts;
1396
1397     if(s->input_picture[encoding_delay])
1398         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1399
1400   }
1401
1402     /* shift buffer entries */
1403     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1404         s->input_picture[i-1]= s->input_picture[i];
1405
1406     s->input_picture[encoding_delay]= (Picture*)pic;
1407
1408     return 0;
1409 }
1410
1411 static void select_input_picture(MpegEncContext *s){
1412     int i;
1413     const int encoding_delay= s->max_b_frames;
1414     int coded_pic_num=0;
1415
1416     if(s->reordered_input_picture[0])
1417         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1418
1419     for(i=1; i<MAX_PICTURE_COUNT; i++)
1420         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1421     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1422
1423     /* set next picture types & ordering */
1424     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1425         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1426             s->reordered_input_picture[0]= s->input_picture[0];
1427             s->reordered_input_picture[0]->pict_type= I_TYPE;
1428             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1429         }else{
1430             int b_frames;
1431
1432             if(s->flags&CODEC_FLAG_PASS2){
1433                 for(i=0; i<s->max_b_frames+1; i++){
1434                     int pict_num= s->input_picture[0]->display_picture_number + i;
1435                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1436                     s->input_picture[i]->pict_type= pict_type;
1437
1438                     if(i + 1 >= s->rc_context.num_entries) break;
1439                 }
1440             }
1441
1442             if(s->input_picture[0]->pict_type){
1443                 /* user selected pict_type */
1444                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1445                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1446                 }
1447
1448                 if(b_frames > s->max_b_frames){
1449                     fprintf(stderr, "warning, too many bframes in a row\n");
1450                     b_frames = s->max_b_frames;
1451                 }
1452             }else if(s->b_frame_strategy==0){
1453                 b_frames= s->max_b_frames;
1454                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1455             }else if(s->b_frame_strategy==1){
1456                 for(i=1; i<s->max_b_frames+1; i++){
1457                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1458                         s->input_picture[i]->b_frame_score=
1459                             get_intra_count(s, s->input_picture[i  ]->data[0],
1460                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1461                     }
1462                 }
1463                 for(i=0; i<s->max_b_frames; i++){
1464                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1465                 }
1466
1467                 b_frames= FFMAX(0, i-1);
1468
1469                 /* reset scores */
1470                 for(i=0; i<b_frames+1; i++){
1471                     s->input_picture[i]->b_frame_score=0;
1472                 }
1473             }else{
1474                 fprintf(stderr, "illegal b frame strategy\n");
1475                 b_frames=0;
1476             }
1477
1478             emms_c();
1479 //static int b_count=0;
1480 //b_count+= b_frames;
1481 //printf("b_frames: %d\n", b_count);
1482
1483             s->reordered_input_picture[0]= s->input_picture[b_frames];
1484             if(   s->picture_in_gop_number + b_frames >= s->gop_size
1485                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1486                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1487             else
1488                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1489             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1490             for(i=0; i<b_frames; i++){
1491                 coded_pic_num++;
1492                 s->reordered_input_picture[i+1]= s->input_picture[i];
1493                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1494                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1495             }
1496         }
1497     }
1498
1499     if(s->reordered_input_picture[0]){
1500         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1501
1502         s->new_picture= *s->reordered_input_picture[0];
1503
1504         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1505             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1506
1507             int i= find_unused_picture(s, 0);
1508             Picture *pic= &s->picture[i];
1509
1510             /* mark us unused / free shared pic */
1511             for(i=0; i<4; i++)
1512                 s->reordered_input_picture[0]->data[i]= NULL;
1513             s->reordered_input_picture[0]->type= 0;
1514
1515             //FIXME bad, copy * except
1516             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1517             pic->quality   = s->reordered_input_picture[0]->quality;
1518             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1519             pic->reference = s->reordered_input_picture[0]->reference;
1520             pic->pts = s->reordered_input_picture[0]->pts;
1521
1522             alloc_picture(s, pic, 0);
1523
1524             s->current_picture_ptr= pic;
1525         }else{
1526             // input is not a shared pix -> reuse buffer for current_pix
1527
1528             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1529                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1530
1531             s->current_picture_ptr= s->reordered_input_picture[0];
1532             for(i=0; i<4; i++){
1533                 //reverse the +16 we did before storing the input
1534                 s->current_picture_ptr->data[i]-=16;
1535             }
1536         }
1537         s->current_picture= *s->current_picture_ptr;
1538
1539         s->picture_number= s->new_picture.display_picture_number;
1540 //printf("dpn:%d\n", s->picture_number);
1541     }else{
1542        memset(&s->new_picture, 0, sizeof(Picture));
1543     }
1544 }
1545
1546 int MPV_encode_picture(AVCodecContext *avctx,
1547                        unsigned char *buf, int buf_size, void *data)
1548 {
1549     MpegEncContext *s = avctx->priv_data;
1550     AVFrame *pic_arg = data;
1551     int i;
1552
1553     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1554         fprintf(stderr, "this codec supports only YUV420P\n");
1555         return -1;
1556     }
1557
1558     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1559
1560     s->picture_in_gop_number++;
1561
1562     load_input_picture(s, pic_arg);
1563
1564     select_input_picture(s);
1565
1566     /* output? */
1567     if(s->new_picture.data[0]){
1568
1569         s->pict_type= s->new_picture.pict_type;
1570         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1571             s->qscale= (int)(s->new_picture.quality+0.5);
1572             assert(s->qscale);
1573         }
1574 //emms_c();
1575 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1576         MPV_frame_start(s, avctx);
1577
1578         encode_picture(s, s->picture_number);
1579
1580         avctx->real_pict_num  = s->picture_number;
1581         avctx->header_bits = s->header_bits;
1582         avctx->mv_bits     = s->mv_bits;
1583         avctx->misc_bits   = s->misc_bits;
1584         avctx->i_tex_bits  = s->i_tex_bits;
1585         avctx->p_tex_bits  = s->p_tex_bits;
1586         avctx->i_count     = s->i_count;
1587         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1588         avctx->skip_count  = s->skip_count;
1589
1590         MPV_frame_end(s);
1591
1592         if (s->out_format == FMT_MJPEG)
1593             mjpeg_picture_trailer(s);
1594
1595         if(s->flags&CODEC_FLAG_PASS1)
1596             ff_write_pass1_stats(s);
1597
1598         for(i=0; i<4; i++){
1599             avctx->error[i] += s->current_picture_ptr->error[i];
1600         }
1601     }
1602
1603     s->input_picture_number++;
1604
1605     flush_put_bits(&s->pb);
1606     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1607
1608     s->total_bits += s->frame_bits;
1609     avctx->frame_bits  = s->frame_bits;
1610
1611     return pbBufPtr(&s->pb) - s->pb.buf;
1612 }
1613
1614 #endif //CONFIG_ENCODERS
1615
1616 static inline void gmc1_motion(MpegEncContext *s,
1617                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1618                                int dest_offset,
1619                                uint8_t **ref_picture, int src_offset)
1620 {
1621     uint8_t *ptr;
1622     int offset, src_x, src_y, linesize, uvlinesize;
1623     int motion_x, motion_y;
1624     int emu=0;
1625
1626     motion_x= s->sprite_offset[0][0];
1627     motion_y= s->sprite_offset[0][1];
1628     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1629     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1630     motion_x<<=(3-s->sprite_warping_accuracy);
1631     motion_y<<=(3-s->sprite_warping_accuracy);
1632     src_x = clip(src_x, -16, s->width);
1633     if (src_x == s->width)
1634         motion_x =0;
1635     src_y = clip(src_y, -16, s->height);
1636     if (src_y == s->height)
1637         motion_y =0;
1638
1639     linesize = s->linesize;
1640     uvlinesize = s->uvlinesize;
1641
1642     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1643
1644     dest_y+=dest_offset;
1645     if(s->flags&CODEC_FLAG_EMU_EDGE){
1646         if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
1647                               || src_y + 17 >= s->v_edge_pos){
1648             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1649             ptr= s->edge_emu_buffer;
1650         }
1651     }
1652
1653     if((motion_x|motion_y)&7){
1654         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1655         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1656     }else{
1657         int dxy;
1658
1659         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1660         if (s->no_rounding){
1661             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1662         }else{
1663             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1664         }
1665     }
1666
1667     if(s->flags&CODEC_FLAG_GRAY) return;
1668
1669     motion_x= s->sprite_offset[1][0];
1670     motion_y= s->sprite_offset[1][1];
1671     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1672     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1673     motion_x<<=(3-s->sprite_warping_accuracy);
1674     motion_y<<=(3-s->sprite_warping_accuracy);
1675     src_x = clip(src_x, -8, s->width>>1);
1676     if (src_x == s->width>>1)
1677         motion_x =0;
1678     src_y = clip(src_y, -8, s->height>>1);
1679     if (src_y == s->height>>1)
1680         motion_y =0;
1681
1682     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1683     ptr = ref_picture[1] + offset;
1684     if(s->flags&CODEC_FLAG_EMU_EDGE){
1685         if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
1686                               || src_y + 9 >= s->v_edge_pos>>1){
1687             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1688             ptr= s->edge_emu_buffer;
1689             emu=1;
1690         }
1691     }
1692     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1693
1694     ptr = ref_picture[2] + offset;
1695     if(emu){
1696         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1697         ptr= s->edge_emu_buffer;
1698     }
1699     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1700
1701     return;
1702 }
1703
1704 static inline void gmc_motion(MpegEncContext *s,
1705                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1706                                int dest_offset,
1707                                uint8_t **ref_picture, int src_offset)
1708 {
1709     uint8_t *ptr;
1710     int linesize, uvlinesize;
1711     const int a= s->sprite_warping_accuracy;
1712     int ox, oy;
1713
1714     linesize = s->linesize;
1715     uvlinesize = s->uvlinesize;
1716
1717     ptr = ref_picture[0] + src_offset;
1718
1719     dest_y+=dest_offset;
1720
1721     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1722     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1723
1724     s->dsp.gmc(dest_y, ptr, linesize, 16,
1725            ox,
1726            oy,
1727            s->sprite_delta[0][0], s->sprite_delta[0][1],
1728            s->sprite_delta[1][0], s->sprite_delta[1][1],
1729            a+1, (1<<(2*a+1)) - s->no_rounding,
1730            s->h_edge_pos, s->v_edge_pos);
1731     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1732            ox + s->sprite_delta[0][0]*8,
1733            oy + s->sprite_delta[1][0]*8,
1734            s->sprite_delta[0][0], s->sprite_delta[0][1],
1735            s->sprite_delta[1][0], s->sprite_delta[1][1],
1736            a+1, (1<<(2*a+1)) - s->no_rounding,
1737            s->h_edge_pos, s->v_edge_pos);
1738
1739     if(s->flags&CODEC_FLAG_GRAY) return;
1740
1741
1742     dest_cb+=dest_offset>>1;
1743     dest_cr+=dest_offset>>1;
1744
1745     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1746     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1747
1748     ptr = ref_picture[1] + (src_offset>>1);
1749     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1750            ox,
1751            oy,
1752            s->sprite_delta[0][0], s->sprite_delta[0][1],
1753            s->sprite_delta[1][0], s->sprite_delta[1][1],
1754            a+1, (1<<(2*a+1)) - s->no_rounding,
1755            s->h_edge_pos>>1, s->v_edge_pos>>1);
1756
1757     ptr = ref_picture[2] + (src_offset>>1);
1758     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1759            ox,
1760            oy,
1761            s->sprite_delta[0][0], s->sprite_delta[0][1],
1762            s->sprite_delta[1][0], s->sprite_delta[1][1],
1763            a+1, (1<<(2*a+1)) - s->no_rounding,
1764            s->h_edge_pos>>1, s->v_edge_pos>>1);
1765 }
1766
1767 /**
1768  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1769  * @param buf destination buffer
1770  * @param src source buffer
1771  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1772  * @param block_w width of block
1773  * @param block_h height of block
1774  * @param src_x x coordinate of the top left sample of the block in the source buffer
1775  * @param src_y y coordinate of the top left sample of the block in the source buffer
1776  * @param w width of the source buffer
1777  * @param h height of the source buffer
1778  */
1779 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1780                                     int src_x, int src_y, int w, int h){
1781     int x, y;
1782     int start_y, start_x, end_y, end_x;
1783
1784     if(src_y>= h){
1785         src+= (h-1-src_y)*linesize;
1786         src_y=h-1;
1787     }else if(src_y<=-block_h){
1788         src+= (1-block_h-src_y)*linesize;
1789         src_y=1-block_h;
1790     }
1791     if(src_x>= w){
1792         src+= (w-1-src_x);
1793         src_x=w-1;
1794     }else if(src_x<=-block_w){
1795         src+= (1-block_w-src_x);
1796         src_x=1-block_w;
1797     }
1798
1799     start_y= FFMAX(0, -src_y);
1800     start_x= FFMAX(0, -src_x);
1801     end_y= FFMIN(block_h, h-src_y);
1802     end_x= FFMIN(block_w, w-src_x);
1803
1804     // copy existing part
1805     for(y=start_y; y<end_y; y++){
1806         for(x=start_x; x<end_x; x++){
1807             buf[x + y*linesize]= src[x + y*linesize];
1808         }
1809     }
1810
1811     //top
1812     for(y=0; y<start_y; y++){
1813         for(x=start_x; x<end_x; x++){
1814             buf[x + y*linesize]= buf[x + start_y*linesize];
1815         }
1816     }
1817
1818     //bottom
1819     for(y=end_y; y<block_h; y++){
1820         for(x=start_x; x<end_x; x++){
1821             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1822         }
1823     }
1824
1825     for(y=0; y<block_h; y++){
1826        //left
1827         for(x=0; x<start_x; x++){
1828             buf[x + y*linesize]= buf[start_x + y*linesize];
1829         }
1830
1831        //right
1832         for(x=end_x; x<block_w; x++){
1833             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1834         }
1835     }
1836 }
1837
1838
1839 /* apply one mpeg motion vector to the three components */
1840 static inline void mpeg_motion(MpegEncContext *s,
1841                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1842                                int dest_offset,
1843                                uint8_t **ref_picture, int src_offset,
1844                                int field_based, op_pixels_func (*pix_op)[4],
1845                                int motion_x, int motion_y, int h)
1846 {
1847     uint8_t *ptr;
1848     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1849     int emu=0;
1850 #if 0
1851 if(s->quarter_sample)
1852 {
1853     motion_x>>=1;
1854     motion_y>>=1;
1855 }
1856 #endif
1857     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1858     src_x = s->mb_x * 16 + (motion_x >> 1);
1859     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1860
1861     /* WARNING: do no forget half pels */
1862     height = s->height >> field_based;
1863     v_edge_pos = s->v_edge_pos >> field_based;
1864     src_x = clip(src_x, -16, s->width);
1865     if (src_x == s->width)
1866         dxy &= ~1;
1867     src_y = clip(src_y, -16, height);
1868     if (src_y == height)
1869         dxy &= ~2;
1870     linesize   = s->current_picture.linesize[0] << field_based;
1871     uvlinesize = s->current_picture.linesize[1] << field_based;
1872     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1873     dest_y += dest_offset;
1874
1875     if(s->flags&CODEC_FLAG_EMU_EDGE){
1876         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1877                               || src_y + (motion_y&1) + h  > v_edge_pos){
1878             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1879                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1880             ptr= s->edge_emu_buffer + src_offset;
1881             emu=1;
1882         }
1883     }
1884     pix_op[0][dxy](dest_y, ptr, linesize, h);
1885
1886     if(s->flags&CODEC_FLAG_GRAY) return;
1887
1888     if (s->out_format == FMT_H263) {
1889         dxy = 0;
1890         if ((motion_x & 3) != 0)
1891             dxy |= 1;
1892         if ((motion_y & 3) != 0)
1893             dxy |= 2;
1894         mx = motion_x >> 2;
1895         my = motion_y >> 2;
1896     } else {
1897         mx = motion_x / 2;
1898         my = motion_y / 2;
1899         dxy = ((my & 1) << 1) | (mx & 1);
1900         mx >>= 1;
1901         my >>= 1;
1902     }
1903
1904     src_x = s->mb_x * 8 + mx;
1905     src_y = s->mb_y * (8 >> field_based) + my;
1906     src_x = clip(src_x, -8, s->width >> 1);
1907     if (src_x == (s->width >> 1))
1908         dxy &= ~1;
1909     src_y = clip(src_y, -8, height >> 1);
1910     if (src_y == (height >> 1))
1911         dxy &= ~2;
1912     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1913     ptr = ref_picture[1] + offset;
1914     if(emu){
1915         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1916                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1917         ptr= s->edge_emu_buffer + (src_offset >> 1);
1918     }
1919     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1920
1921     ptr = ref_picture[2] + offset;
1922     if(emu){
1923         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1924                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1925         ptr= s->edge_emu_buffer + (src_offset >> 1);
1926     }
1927     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1928 }
1929
1930 static inline void qpel_motion(MpegEncContext *s,
1931                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1932                                int dest_offset,
1933                                uint8_t **ref_picture, int src_offset,
1934                                int field_based, op_pixels_func (*pix_op)[4],
1935                                qpel_mc_func (*qpix_op)[16],
1936                                int motion_x, int motion_y, int h)
1937 {
1938     uint8_t *ptr;
1939     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1940     int emu=0;
1941
1942     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1943     src_x = s->mb_x * 16 + (motion_x >> 2);
1944     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1945
1946     height = s->height >> field_based;
1947     v_edge_pos = s->v_edge_pos >> field_based;
1948     src_x = clip(src_x, -16, s->width);
1949     if (src_x == s->width)
1950         dxy &= ~3;
1951     src_y = clip(src_y, -16, height);
1952     if (src_y == height)
1953         dxy &= ~12;
1954     linesize = s->linesize << field_based;
1955     uvlinesize = s->uvlinesize << field_based;
1956     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1957     dest_y += dest_offset;
1958 //printf("%d %d %d\n", src_x, src_y, dxy);
1959
1960     if(s->flags&CODEC_FLAG_EMU_EDGE){
1961         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1962                               || src_y + (motion_y&3) + h  > v_edge_pos){
1963             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,
1964                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1965             ptr= s->edge_emu_buffer + src_offset;
1966             emu=1;
1967         }
1968     }
1969     if(!field_based)
1970         qpix_op[0][dxy](dest_y, ptr, linesize);
1971     else{
1972         //damn interlaced mode
1973         //FIXME boundary mirroring is not exactly correct here
1974         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1975         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1976     }
1977
1978     if(s->flags&CODEC_FLAG_GRAY) return;
1979
1980     if(field_based){
1981         mx= motion_x/2;
1982         my= motion_y>>1;
1983     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
1984         static const int rtab[8]= {0,0,1,1,0,0,0,1};
1985         mx= (motion_x>>1) + rtab[motion_x&7];
1986         my= (motion_y>>1) + rtab[motion_y&7];
1987     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
1988         mx= (motion_x>>1)|(motion_x&1);
1989         my= (motion_y>>1)|(motion_y&1);
1990     }else{
1991         mx= motion_x/2;
1992         my= motion_y/2;
1993     }
1994     mx= (mx>>1)|(mx&1);
1995     my= (my>>1)|(my&1);
1996
1997     dxy= (mx&1) | ((my&1)<<1);
1998     mx>>=1;
1999     my>>=1;
2000
2001     src_x = s->mb_x * 8 + mx;
2002     src_y = s->mb_y * (8 >> field_based) + my;
2003     src_x = clip(src_x, -8, s->width >> 1);
2004     if (src_x == (s->width >> 1))
2005         dxy &= ~1;
2006     src_y = clip(src_y, -8, height >> 1);
2007     if (src_y == (height >> 1))
2008         dxy &= ~2;
2009
2010     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2011     ptr = ref_picture[1] + offset;
2012     if(emu){
2013         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2014                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2015         ptr= s->edge_emu_buffer + (src_offset >> 1);
2016     }
2017     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2018
2019     ptr = ref_picture[2] + offset;
2020     if(emu){
2021         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2022                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2023         ptr= s->edge_emu_buffer + (src_offset >> 1);
2024     }
2025     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2026 }
2027
2028 inline int ff_h263_round_chroma(int x){
2029     if (x >= 0)
2030         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2031     else {
2032         x = -x;
2033         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2034     }
2035 }
2036
2037 /**
2038  * motion compesation of a single macroblock
2039  * @param s context
2040  * @param dest_y luma destination pointer
2041  * @param dest_cb chroma cb/u destination pointer
2042  * @param dest_cr chroma cr/v destination pointer
2043  * @param dir direction (0->forward, 1->backward)
2044  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2045  * @param pic_op halfpel motion compensation function (average or put normally)
2046  * @param pic_op qpel motion compensation function (average or put normally)
2047  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2048  */
2049 static inline void MPV_motion(MpegEncContext *s,
2050                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2051                               int dir, uint8_t **ref_picture,
2052                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2053 {
2054     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
2055     int mb_x, mb_y, i;
2056     uint8_t *ptr, *dest;
2057     int emu=0;
2058
2059     mb_x = s->mb_x;
2060     mb_y = s->mb_y;
2061
2062     switch(s->mv_type) {
2063     case MV_TYPE_16X16:
2064 #ifdef CONFIG_RISKY
2065         if(s->mcsel){
2066             if(s->real_sprite_warping_points==1){
2067                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2068                             ref_picture, 0);
2069             }else{
2070                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2071                             ref_picture, 0);
2072             }
2073         }else if(s->quarter_sample){
2074             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2075                         ref_picture, 0,
2076                         0, pix_op, qpix_op,
2077                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2078         }else if(s->mspel){
2079             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2080                         ref_picture, pix_op,
2081                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2082         }else
2083 #endif
2084         {
2085             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2086                         ref_picture, 0,
2087                         0, pix_op,
2088                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2089         }
2090         break;
2091     case MV_TYPE_8X8:
2092         mx = 0;
2093         my = 0;
2094         if(s->quarter_sample){
2095             for(i=0;i<4;i++) {
2096                 motion_x = s->mv[dir][i][0];
2097                 motion_y = s->mv[dir][i][1];
2098
2099                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2100                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2101                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2102
2103                 /* WARNING: do no forget half pels */
2104                 src_x = clip(src_x, -16, s->width);
2105                 if (src_x == s->width)
2106                     dxy &= ~3;
2107                 src_y = clip(src_y, -16, s->height);
2108                 if (src_y == s->height)
2109                     dxy &= ~12;
2110
2111                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2112                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2113                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
2114                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
2115                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2116                         ptr= s->edge_emu_buffer;
2117                     }
2118                 }
2119                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2120                 qpix_op[1][dxy](dest, ptr, s->linesize);
2121
2122                 mx += s->mv[dir][i][0]/2;
2123                 my += s->mv[dir][i][1]/2;
2124             }
2125         }else{
2126             for(i=0;i<4;i++) {
2127                 motion_x = s->mv[dir][i][0];
2128                 motion_y = s->mv[dir][i][1];
2129
2130                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2131                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
2132                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
2133
2134                 /* WARNING: do no forget half pels */
2135                 src_x = clip(src_x, -16, s->width);
2136                 if (src_x == s->width)
2137                     dxy &= ~1;
2138                 src_y = clip(src_y, -16, s->height);
2139                 if (src_y == s->height)
2140                     dxy &= ~2;
2141
2142                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2143                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2144                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
2145                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
2146                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2147                         ptr= s->edge_emu_buffer;
2148                     }
2149                 }
2150                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2151                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
2152
2153                 mx += s->mv[dir][i][0];
2154                 my += s->mv[dir][i][1];
2155             }
2156         }
2157
2158         if(s->flags&CODEC_FLAG_GRAY) break;
2159         /* In case of 8X8, we construct a single chroma motion vector
2160            with a special rounding */
2161         mx= ff_h263_round_chroma(mx);
2162         my= ff_h263_round_chroma(my);
2163         dxy = ((my & 1) << 1) | (mx & 1);
2164         mx >>= 1;
2165         my >>= 1;
2166
2167         src_x = mb_x * 8 + mx;
2168         src_y = mb_y * 8 + my;
2169         src_x = clip(src_x, -8, s->width/2);
2170         if (src_x == s->width/2)
2171             dxy &= ~1;
2172         src_y = clip(src_y, -8, s->height/2);
2173         if (src_y == s->height/2)
2174             dxy &= ~2;
2175
2176         offset = (src_y * (s->uvlinesize)) + src_x;
2177         ptr = ref_picture[1] + offset;
2178         if(s->flags&CODEC_FLAG_EMU_EDGE){
2179                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
2180                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
2181                     ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2182                     ptr= s->edge_emu_buffer;
2183                     emu=1;
2184                 }
2185             }
2186         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2187
2188         ptr = ref_picture[2] + offset;
2189         if(emu){
2190             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2191             ptr= s->edge_emu_buffer;
2192         }
2193         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2194         break;
2195     case MV_TYPE_FIELD:
2196         if (s->picture_structure == PICT_FRAME) {
2197             if(s->quarter_sample){
2198                 /* top field */
2199                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2200                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2201                             1, pix_op, qpix_op,
2202                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2203                 /* bottom field */
2204                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2205                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2206                             1, pix_op, qpix_op,
2207                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2208             }else{
2209                 /* top field */
2210                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2211                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2212                             1, pix_op,
2213                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2214                 /* bottom field */
2215                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2216                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2217                             1, pix_op,
2218                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2219             }
2220         } else {
2221             int offset;
2222             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2223                 offset= s->field_select[dir][0] ? s->linesize : 0;
2224             }else{
2225                 ref_picture= s->current_picture.data;
2226                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2227             }
2228
2229             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2230                         ref_picture, offset,
2231                         0, pix_op,
2232                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2233         }
2234         break;
2235     case MV_TYPE_16X8:{
2236         int offset;
2237          uint8_t ** ref2picture;
2238
2239             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2240                 ref2picture= ref_picture;
2241                 offset= s->field_select[dir][0] ? s->linesize : 0;
2242             }else{
2243                 ref2picture= s->current_picture.data;
2244                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2245             }
2246
2247             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2248                         ref2picture, offset,
2249                         0, pix_op,
2250                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2251
2252
2253             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2254                 ref2picture= ref_picture;
2255                 offset= s->field_select[dir][1] ? s->linesize : 0;
2256             }else{
2257                 ref2picture= s->current_picture.data;
2258                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize;
2259             }
2260             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2261             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2262                         0,
2263                         ref2picture, offset,
2264                         0, pix_op,
2265                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2266         }
2267
2268         break;
2269     case MV_TYPE_DMV:
2270     {
2271     op_pixels_func (*dmv_pix_op)[4];
2272     int offset;
2273
2274         dmv_pix_op = s->dsp.put_pixels_tab;
2275
2276         if(s->picture_structure == PICT_FRAME){
2277             //put top field from top field
2278             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2279                         ref_picture, 0,
2280                         1, dmv_pix_op,
2281                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2282             //put bottom field from bottom field
2283             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2284                         ref_picture, s->linesize,
2285                         1, dmv_pix_op,
2286                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2287
2288             dmv_pix_op = s->dsp.avg_pixels_tab;
2289
2290             //avg top field from bottom field
2291             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2292                         ref_picture, s->linesize,
2293                         1, dmv_pix_op,
2294                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2295             //avg bottom field from top field
2296             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2297                         ref_picture, 0,
2298                         1, dmv_pix_op,
2299                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2300
2301         }else{
2302             offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2303                          s->linesize : 0;
2304
2305             //put field from the same parity
2306             //same parity is never in the same frame
2307             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2308                         ref_picture,offset,
2309                         0,dmv_pix_op,
2310                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2311
2312             // after put we make avg of the same block
2313             dmv_pix_op=s->dsp.avg_pixels_tab;
2314
2315             //opposite parity is always in the same frame if this is second field
2316             if(!s->first_field){
2317                 ref_picture = s->current_picture.data;
2318                 //top field is one linesize from frame beginig
2319                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2320                         -s->linesize : s->linesize;
2321             }else
2322                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2323                         0 : s->linesize;
2324
2325             //avg field from the opposite parity
2326             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2327                         ref_picture, offset,
2328                         0,dmv_pix_op,
2329                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2330         }
2331     }
2332     break;
2333
2334     }
2335 }
2336
2337
2338 /* put block[] to dest[] */
2339 static inline void put_dct(MpegEncContext *s,
2340                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2341 {
2342     s->dct_unquantize(s, block, i, s->qscale);
2343     s->dsp.idct_put (dest, line_size, block);
2344 }
2345
2346 /* add block[] to dest[] */
2347 static inline void add_dct(MpegEncContext *s,
2348                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2349 {
2350     if (s->block_last_index[i] >= 0) {
2351         s->dsp.idct_add (dest, line_size, block);
2352     }
2353 }
2354
2355 static inline void add_dequant_dct(MpegEncContext *s,
2356                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2357 {
2358     if (s->block_last_index[i] >= 0) {
2359         s->dct_unquantize(s, block, i, s->qscale);
2360
2361         s->dsp.idct_add (dest, line_size, block);
2362     }
2363 }
2364
2365 /**
2366  * cleans dc, ac, coded_block for the current non intra MB
2367  */
2368 void ff_clean_intra_table_entries(MpegEncContext *s)
2369 {
2370     int wrap = s->block_wrap[0];
2371     int xy = s->block_index[0];
2372
2373     s->dc_val[0][xy           ] =
2374     s->dc_val[0][xy + 1       ] =
2375     s->dc_val[0][xy     + wrap] =
2376     s->dc_val[0][xy + 1 + wrap] = 1024;
2377     /* ac pred */
2378     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2379     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2380     if (s->msmpeg4_version>=3) {
2381         s->coded_block[xy           ] =
2382         s->coded_block[xy + 1       ] =
2383         s->coded_block[xy     + wrap] =
2384         s->coded_block[xy + 1 + wrap] = 0;
2385     }
2386     /* chroma */
2387     wrap = s->block_wrap[4];
2388     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2389     s->dc_val[1][xy] =
2390     s->dc_val[2][xy] = 1024;
2391     /* ac pred */
2392     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2393     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2394
2395     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2396 }
2397
2398 /* generic function called after a macroblock has been parsed by the
2399    decoder or after it has been encoded by the encoder.
2400
2401    Important variables used:
2402    s->mb_intra : true if intra macroblock
2403    s->mv_dir   : motion vector direction
2404    s->mv_type  : motion vector type
2405    s->mv       : motion vector
2406    s->interlaced_dct : true if interlaced dct used (mpeg2)
2407  */
2408 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2409 {
2410     int mb_x, mb_y;
2411     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2412 #ifdef HAVE_XVMC
2413     if(s->avctx->xvmc_acceleration){
2414         XVMC_decode_mb(s,block);
2415         return;
2416     }
2417 #endif
2418
2419     mb_x = s->mb_x;
2420     mb_y = s->mb_y;
2421
2422     s->current_picture.qscale_table[mb_xy]= s->qscale;
2423
2424     /* update DC predictors for P macroblocks */
2425     if (!s->mb_intra) {
2426         if (s->h263_pred || s->h263_aic) {
2427             if(s->mbintra_table[mb_xy])
2428                 ff_clean_intra_table_entries(s);
2429         } else {
2430             s->last_dc[0] =
2431             s->last_dc[1] =
2432             s->last_dc[2] = 128 << s->intra_dc_precision;
2433         }
2434     }
2435     else if (s->h263_pred || s->h263_aic)
2436         s->mbintra_table[mb_xy]=1;
2437
2438     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2439         uint8_t *dest_y, *dest_cb, *dest_cr;
2440         int dct_linesize, dct_offset;
2441         op_pixels_func (*op_pix)[4];
2442         qpel_mc_func (*op_qpix)[16];
2443         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2444         const int uvlinesize= s->current_picture.linesize[1];
2445
2446         /* avoid copy if macroblock skipped in last frame too */
2447         /* skip only during decoding as we might trash the buffers during encoding a bit */
2448         if(!s->encoding){
2449             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2450             const int age= s->current_picture.age;
2451
2452             assert(age);
2453
2454             if (s->mb_skiped) {
2455                 s->mb_skiped= 0;
2456                 assert(s->pict_type!=I_TYPE);
2457
2458                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2459                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2460
2461                 /* if previous was skipped too, then nothing to do !  */
2462                 if (*mbskip_ptr >= age && s->current_picture.reference){
2463                     return;
2464                 }
2465             } else if(!s->current_picture.reference){
2466                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2467                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2468             } else{
2469                 *mbskip_ptr = 0; /* not skipped */
2470             }
2471         }
2472
2473         if (s->interlaced_dct) {
2474             dct_linesize = linesize * 2;
2475             dct_offset = linesize;
2476         } else {
2477             dct_linesize = linesize;
2478             dct_offset = linesize * 8;
2479         }
2480
2481         dest_y=  s->dest[0];
2482         dest_cb= s->dest[1];
2483         dest_cr= s->dest[2];
2484
2485         if (!s->mb_intra) {
2486             /* motion handling */
2487             /* decoding or more than one mb_type (MC was allready done otherwise) */
2488             if(!s->encoding){
2489                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
2490                     op_pix = s->dsp.put_pixels_tab;
2491                     op_qpix= s->dsp.put_qpel_pixels_tab;
2492                 }else{
2493                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2494                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2495                 }
2496
2497                 if (s->mv_dir & MV_DIR_FORWARD) {
2498                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2499                     op_pix = s->dsp.avg_pixels_tab;
2500                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2501                 }
2502                 if (s->mv_dir & MV_DIR_BACKWARD) {
2503                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2504                 }
2505             }
2506
2507             /* skip dequant / idct if we are really late ;) */
2508             if(s->hurry_up>1) return;
2509
2510             /* add dct residue */
2511             if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO
2512                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2513                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2514                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2515                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2516                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2517
2518                 if(!(s->flags&CODEC_FLAG_GRAY)){
2519                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2520                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2521                 }
2522             } else if(s->codec_id != CODEC_ID_WMV2){
2523                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2524                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2525                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2526                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2527
2528                 if(!(s->flags&CODEC_FLAG_GRAY)){
2529                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2530                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2531                 }
2532             }
2533 #ifdef CONFIG_RISKY
2534             else{
2535                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2536             }
2537 #endif
2538         } else {
2539             /* dct only in intra block */
2540             if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
2541                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2542                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2543                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2544                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2545
2546                 if(!(s->flags&CODEC_FLAG_GRAY)){
2547                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2548                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2549                 }
2550             }else{
2551                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2552                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2553                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2554                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2555
2556                 if(!(s->flags&CODEC_FLAG_GRAY)){
2557                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2558                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2559                 }
2560             }
2561         }
2562     }
2563 }
2564
2565 #ifdef CONFIG_ENCODERS
2566
2567 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2568 {
2569     static const char tab[64]=
2570         {3,2,2,1,1,1,1,1,
2571          1,1,1,1,1,1,1,1,
2572          1,1,1,1,1,1,1,1,
2573          0,0,0,0,0,0,0,0,
2574          0,0,0,0,0,0,0,0,
2575          0,0,0,0,0,0,0,0,
2576          0,0,0,0,0,0,0,0,
2577          0,0,0,0,0,0,0,0};
2578     int score=0;
2579     int run=0;
2580     int i;
2581     DCTELEM *block= s->block[n];
2582     const int last_index= s->block_last_index[n];
2583     int skip_dc;
2584
2585     if(threshold<0){
2586         skip_dc=0;
2587         threshold= -threshold;
2588     }else
2589         skip_dc=1;
2590
2591     /* are all which we could set to zero are allready zero? */
2592     if(last_index<=skip_dc - 1) return;
2593
2594     for(i=0; i<=last_index; i++){
2595         const int j = s->intra_scantable.permutated[i];
2596         const int level = ABS(block[j]);
2597         if(level==1){
2598             if(skip_dc && i==0) continue;
2599             score+= tab[run];
2600             run=0;
2601         }else if(level>1){
2602             return;
2603         }else{
2604             run++;
2605         }
2606     }
2607     if(score >= threshold) return;
2608     for(i=skip_dc; i<=last_index; i++){
2609         const int j = s->intra_scantable.permutated[i];
2610         block[j]=0;
2611     }
2612     if(block[0]) s->block_last_index[n]= 0;
2613     else         s->block_last_index[n]= -1;
2614 }
2615
2616 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2617 {
2618     int i;
2619     const int maxlevel= s->max_qcoeff;
2620     const int minlevel= s->min_qcoeff;
2621
2622     if(s->mb_intra){
2623         i=1; //skip clipping of intra dc
2624     }else
2625         i=0;
2626
2627     for(;i<=last_index; i++){
2628         const int j= s->intra_scantable.permutated[i];
2629         int level = block[j];
2630
2631         if     (level>maxlevel) level=maxlevel;
2632         else if(level<minlevel) level=minlevel;
2633
2634         block[j]= level;
2635     }
2636 }
2637
2638 #if 0
2639 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2640     int score=0;
2641     int x,y;
2642
2643     for(y=0; y<7; y++){
2644         for(x=0; x<16; x+=4){
2645             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
2646                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2647         }
2648         s+= stride;
2649     }
2650
2651     return score;
2652 }
2653
2654 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2655     int score=0;
2656     int x,y;
2657
2658     for(y=0; y<7; y++){
2659         for(x=0; x<16; x++){
2660             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2661         }
2662         s1+= stride;
2663         s2+= stride;
2664     }
2665
2666     return score;
2667 }
2668 #else
2669 #define SQ(a) ((a)*(a))
2670
2671 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2672     int score=0;
2673     int x,y;
2674
2675     for(y=0; y<7; y++){
2676         for(x=0; x<16; x+=4){
2677             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
2678                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2679         }
2680         s+= stride;
2681     }
2682
2683     return score;
2684 }
2685
2686 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2687     int score=0;
2688     int x,y;
2689
2690     for(y=0; y<7; y++){
2691         for(x=0; x<16; x++){
2692             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2693         }
2694         s1+= stride;
2695         s2+= stride;
2696     }
2697
2698     return score;
2699 }
2700
2701 #endif
2702
2703 #endif //CONFIG_ENCODERS
2704
2705 /**
2706  *
2707  * @param h is the normal height, this will be reduced automatically if needed for the last row
2708  */
2709 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2710     if (s->avctx->draw_horiz_band) {
2711         AVFrame *src;
2712         uint8_t *src_ptr[3];
2713         int offset[4];
2714
2715         if(s->picture_structure != PICT_FRAME){
2716             h <<= 1;
2717             y <<= 1;
2718             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2719         }
2720
2721         h= FFMIN(h, s->height - y);
2722
2723         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2724             src= (AVFrame*)s->current_picture_ptr;
2725         else if(s->last_picture_ptr)
2726             src= (AVFrame*)s->last_picture_ptr;
2727         else
2728             return;
2729
2730         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2731             offset[0]=
2732             offset[1]=
2733             offset[2]=
2734             offset[3]= 0;
2735         }else{
2736             offset[0]= y * s->linesize;;
2737             offset[1]=
2738             offset[2]= (y>>1) * s->uvlinesize;;
2739             offset[3]= 0;
2740         }
2741
2742         emms_c();
2743
2744         s->avctx->draw_horiz_band(s->avctx, src, offset,
2745                                   y, s->picture_structure, h);
2746     }
2747 }
2748
2749 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2750     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2751     const int uvlinesize= s->current_picture.linesize[1];
2752
2753     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2754     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
2755     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
2756     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
2757     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2758     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2759
2760     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
2761         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
2762         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
2763         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
2764     }else{
2765         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
2766         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2767         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2768     }
2769 }
2770
2771 #ifdef CONFIG_ENCODERS
2772
2773 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2774 {
2775     const int mb_x= s->mb_x;
2776     const int mb_y= s->mb_y;
2777     int i;
2778     int skip_dct[6];
2779     int dct_offset   = s->linesize*8; //default for progressive frames
2780
2781     for(i=0; i<6; i++) skip_dct[i]=0;
2782
2783     if(s->adaptive_quant){
2784         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
2785
2786         if(s->out_format==FMT_H263){
2787             if     (s->dquant> 2) s->dquant= 2;
2788             else if(s->dquant<-2) s->dquant=-2;
2789         }
2790
2791         if(s->codec_id==CODEC_ID_MPEG4){
2792             if(!s->mb_intra){
2793                 if(s->mv_dir&MV_DIRECT)
2794                     s->dquant=0;
2795
2796                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2797             }
2798         }
2799         s->qscale+= s->dquant;
2800         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2801         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2802     }
2803
2804     if (s->mb_intra) {
2805         uint8_t *ptr;
2806         int wrap_y;
2807         int emu=0;
2808
2809         wrap_y = s->linesize;
2810         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2811
2812         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2813             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2814             ptr= s->edge_emu_buffer;
2815             emu=1;
2816         }
2817
2818         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2819             int progressive_score, interlaced_score;
2820
2821             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2822             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2823
2824             if(progressive_score > interlaced_score + 100){
2825                 s->interlaced_dct=1;
2826
2827                 dct_offset= wrap_y;
2828                 wrap_y<<=1;
2829             }else
2830                 s->interlaced_dct=0;
2831         }
2832
2833         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2834         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2835         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2836         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2837
2838         if(s->flags&CODEC_FLAG_GRAY){
2839             skip_dct[4]= 1;
2840             skip_dct[5]= 1;
2841         }else{
2842             int wrap_c = s->uvlinesize;
2843             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2844             if(emu){
2845                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2846                 ptr= s->edge_emu_buffer;
2847             }
2848             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2849
2850             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2851             if(emu){
2852                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2853                 ptr= s->edge_emu_buffer;
2854             }
2855             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2856         }
2857     }else{
2858         op_pixels_func (*op_pix)[4];
2859         qpel_mc_func (*op_qpix)[16];
2860         uint8_t *dest_y, *dest_cb, *dest_cr;
2861         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2862         int wrap_y, wrap_c;
2863         int emu=0;
2864
2865         dest_y  = s->dest[0];
2866         dest_cb = s->dest[1];
2867         dest_cr = s->dest[2];
2868         wrap_y = s->linesize;
2869         wrap_c = s->uvlinesize;
2870         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2871         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2872         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2873
2874         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2875             op_pix = s->dsp.put_pixels_tab;
2876             op_qpix= s->dsp.put_qpel_pixels_tab;
2877         }else{
2878             op_pix = s->dsp.put_no_rnd_pixels_tab;
2879             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2880         }
2881
2882         if (s->mv_dir & MV_DIR_FORWARD) {
2883             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2884             op_pix = s->dsp.avg_pixels_tab;
2885             op_qpix= s->dsp.avg_qpel_pixels_tab;
2886         }
2887         if (s->mv_dir & MV_DIR_BACKWARD) {
2888             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2889         }
2890
2891         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2892             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2893             ptr_y= s->edge_emu_buffer;
2894             emu=1;
2895         }
2896
2897         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2898             int progressive_score, interlaced_score;
2899
2900             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2901                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2902             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2903                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2904
2905             if(progressive_score > interlaced_score + 600){
2906                 s->interlaced_dct=1;
2907
2908                 dct_offset= wrap_y;
2909                 wrap_y<<=1;
2910             }else
2911                 s->interlaced_dct=0;
2912         }
2913
2914         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2915         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2916         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2917         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2918
2919         if(s->flags&CODEC_FLAG_GRAY){
2920             skip_dct[4]= 1;
2921             skip_dct[5]= 1;
2922         }else{
2923             if(emu){
2924                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2925                 ptr_cb= s->edge_emu_buffer;
2926             }
2927             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2928             if(emu){
2929                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2930                 ptr_cr= s->edge_emu_buffer;
2931             }
2932             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2933         }
2934         /* pre quantization */
2935         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2936             //FIXME optimize
2937             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2938             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2939             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2940             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2941             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2942             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2943 #if 0
2944 {
2945  static int stat[7];
2946  int num=0;
2947  for(i=0; i<6; i++)
2948   if(skip_dct[i]) num++;
2949  stat[num]++;
2950
2951  if(s->mb_x==0 && s->mb_y==0){
2952   for(i=0; i<7; i++){
2953    printf("%6d %1d\n", stat[i], i);
2954   }
2955  }
2956 }
2957 #endif
2958         }
2959
2960     }
2961
2962 #if 0
2963             {
2964                 float adap_parm;
2965
2966                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
2967                             ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2968
2969                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
2970                         (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P',
2971                         s->qscale, adap_parm, s->qscale*adap_parm,
2972                         s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
2973             }
2974 #endif
2975     /* DCT & quantize */
2976     if(s->out_format==FMT_MJPEG){
2977         for(i=0;i<6;i++) {
2978             int overflow;
2979             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2980             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2981         }
2982     }else{
2983         for(i=0;i<6;i++) {
2984             if(!skip_dct[i]){
2985                 int overflow;
2986                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2987             // FIXME we could decide to change to quantizer instead of clipping
2988             // JS: I don't think that would be a good idea it could lower quality instead
2989             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2990                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2991             }else
2992                 s->block_last_index[i]= -1;
2993         }
2994         if(s->luma_elim_threshold && !s->mb_intra)
2995             for(i=0; i<4; i++)
2996                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2997         if(s->chroma_elim_threshold && !s->mb_intra)
2998             for(i=4; i<6; i++)
2999                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3000     }
3001
3002     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3003         s->block_last_index[4]=
3004         s->block_last_index[5]= 0;
3005         s->block[4][0]=
3006         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3007     }
3008
3009     /* huffman encode */
3010     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3011     case CODEC_ID_MPEG1VIDEO:
3012         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3013 #ifdef CONFIG_RISKY
3014     case CODEC_ID_MPEG4:
3015         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3016     case CODEC_ID_MSMPEG4V2:
3017     case CODEC_ID_MSMPEG4V3:
3018     case CODEC_ID_WMV1:
3019         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3020     case CODEC_ID_WMV2:
3021          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3022     case CODEC_ID_H263:
3023     case CODEC_ID_H263P:
3024     case CODEC_ID_FLV1:
3025     case CODEC_ID_RV10:
3026         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3027 #endif
3028     case CODEC_ID_MJPEG:
3029         mjpeg_encode_mb(s, s->block); break;
3030     default:
3031         assert(0);
3032     }
3033 }
3034
3035 #endif //CONFIG_ENCODERS
3036
3037 /**
3038  * combines the (truncated) bitstream to a complete frame
3039  * @returns -1 if no complete frame could be created
3040  */
3041 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3042     ParseContext *pc= &s->parse_context;
3043
3044 #if 0
3045     if(pc->overread){
3046         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3047         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3048     }
3049 #endif
3050
3051     /* copy overreaded byes from last frame into buffer */
3052     for(; pc->overread>0; pc->overread--){
3053         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3054     }
3055
3056     pc->last_index= pc->index;
3057
3058     /* copy into buffer end return */
3059     if(next == END_NOT_FOUND){
3060         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3061
3062         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3063         pc->index += *buf_size;
3064         return -1;
3065     }
3066
3067     *buf_size=
3068     pc->overread_index= pc->index + next;
3069
3070     /* append to buffer */
3071     if(pc->index){
3072         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3073
3074         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3075         pc->index = 0;
3076         *buf= pc->buffer;
3077     }
3078
3079     /* store overread bytes */
3080     for(;next < 0; next++){
3081         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3082         pc->overread++;
3083     }
3084
3085 #if 0
3086     if(pc->overread){
3087         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3088         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3089     }
3090 #endif
3091
3092     return 0;
3093 }
3094
3095 void ff_mpeg_flush(AVCodecContext *avctx){
3096     int i;
3097     MpegEncContext *s = avctx->priv_data;
3098
3099     for(i=0; i<MAX_PICTURE_COUNT; i++){
3100        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3101                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3102         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3103     }
3104     s->last_picture_ptr = s->next_picture_ptr = NULL;
3105
3106     s->parse_context.state= -1;
3107     s->parse_context.frame_start_found= 0;
3108     s->parse_context.overread= 0;
3109     s->parse_context.overread_index= 0;
3110     s->parse_context.index= 0;
3111     s->parse_context.last_index= 0;
3112 }
3113
3114 #ifdef CONFIG_ENCODERS
3115 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3116 {
3117     int bytes= length>>4;
3118     int bits= length&15;
3119     int i;
3120
3121     if(length==0) return;
3122
3123     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3124     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3125 }
3126
3127 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3128     int i;
3129
3130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3131
3132     /* mpeg1 */
3133     d->mb_skip_run= s->mb_skip_run;
3134     for(i=0; i<3; i++)
3135         d->last_dc[i]= s->last_dc[i];
3136
3137     /* statistics */
3138     d->mv_bits= s->mv_bits;
3139     d->i_tex_bits= s->i_tex_bits;
3140     d->p_tex_bits= s->p_tex_bits;
3141     d->i_count= s->i_count;
3142     d->f_count= s->f_count;
3143     d->b_count= s->b_count;
3144     d->skip_count= s->skip_count;
3145     d->misc_bits= s->misc_bits;
3146     d->last_bits= 0;
3147
3148     d->mb_skiped= 0;
3149     d->qscale= s->qscale;
3150 }
3151
3152 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3153     int i;
3154
3155     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
3156     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3157
3158     /* mpeg1 */
3159     d->mb_skip_run= s->mb_skip_run;
3160     for(i=0; i<3; i++)
3161         d->last_dc[i]= s->last_dc[i];
3162
3163     /* statistics */
3164     d->mv_bits= s->mv_bits;
3165     d->i_tex_bits= s->i_tex_bits;
3166     d->p_tex_bits= s->p_tex_bits;
3167     d->i_count= s->i_count;
3168     d->f_count= s->f_count;
3169     d->b_count= s->b_count;
3170     d->skip_count= s->skip_count;
3171     d->misc_bits= s->misc_bits;
3172
3173     d->mb_intra= s->mb_intra;
3174     d->mb_skiped= s->mb_skiped;
3175     d->mv_type= s->mv_type;
3176     d->mv_dir= s->mv_dir;
3177     d->pb= s->pb;
3178     if(s->data_partitioning){
3179         d->pb2= s->pb2;
3180         d->tex_pb= s->tex_pb;
3181     }
3182     d->block= s->block;
3183     for(i=0; i<6; i++)
3184         d->block_last_index[i]= s->block_last_index[i];
3185     d->interlaced_dct= s->interlaced_dct;
3186     d->qscale= s->qscale;
3187 }
3188
3189 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
3190                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3191                            int *dmin, int *next_block, int motion_x, int motion_y)
3192 {
3193     int score;
3194     uint8_t *dest_backup[3];
3195
3196     copy_context_before_encode(s, backup, type);
3197
3198     s->block= s->blocks[*next_block];
3199     s->pb= pb[*next_block];
3200     if(s->data_partitioning){
3201         s->pb2   = pb2   [*next_block];
3202         s->tex_pb= tex_pb[*next_block];
3203     }
3204
3205     if(*next_block){
3206         memcpy(dest_backup, s->dest, sizeof(s->dest));
3207         s->dest[0] = s->me.scratchpad;
3208         s->dest[1] = s->me.scratchpad + 16;
3209         s->dest[2] = s->me.scratchpad + 16 + 8;
3210         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3211         assert(s->linesize >= 64); //FIXME
3212     }
3213
3214     encode_mb(s, motion_x, motion_y);
3215
3216     score= get_bit_count(&s->pb);
3217     if(s->data_partitioning){
3218         score+= get_bit_count(&s->pb2);
3219         score+= get_bit_count(&s->tex_pb);
3220     }
3221
3222     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3223         MPV_decode_mb(s, s->block);
3224
3225         score *= s->qscale * s->qscale * 109;
3226         score += sse_mb(s) << 7;
3227     }
3228
3229     if(*next_block){
3230         memcpy(s->dest, dest_backup, sizeof(s->dest));
3231     }
3232
3233     if(score<*dmin){
3234         *dmin= score;
3235         *next_block^=1;
3236
3237         copy_context_after_encode(best, s, type);
3238     }
3239 }
3240
3241 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3242     uint32_t *sq = squareTbl + 256;
3243     int acc=0;
3244     int x,y;
3245
3246     if(w==16 && h==16)
3247         return s->dsp.sse[0](NULL, src1, src2, stride);
3248     else if(w==8 && h==8)
3249         return s->dsp.sse[1](NULL, src1, src2, stride);
3250
3251     for(y=0; y<h; y++){
3252         for(x=0; x<w; x++){
3253             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3254         }
3255     }
3256
3257     assert(acc>=0);
3258
3259     return acc;
3260 }
3261
3262 static int sse_mb(MpegEncContext *s){
3263     int w= 16;
3264     int h= 16;
3265
3266     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3267     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3268
3269     if(w==16 && h==16)
3270         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3271                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3272                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3273     else
3274         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3275                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3276                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3277 }
3278
3279 static void encode_picture(MpegEncContext *s, int picture_number)
3280 {
3281     int mb_x, mb_y, pdif = 0;
3282     int i;
3283     int bits;
3284     MpegEncContext best_s, backup_s;
3285     uint8_t bit_buf[2][3000];
3286     uint8_t bit_buf2[2][3000];
3287     uint8_t bit_buf_tex[2][3000];
3288     PutBitContext pb[2], pb2[2], tex_pb[2];
3289
3290     for(i=0; i<2; i++){
3291         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
3292         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
3293         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
3294     }
3295
3296     s->picture_number = picture_number;
3297
3298     /* Reset the average MB variance */
3299     s->current_picture.mb_var_sum = 0;
3300     s->current_picture.mc_mb_var_sum = 0;
3301
3302 #ifdef CONFIG_RISKY
3303     /* we need to initialize some time vars before we can encode b-frames */
3304     // RAL: Condition added for MPEG1VIDEO
3305     if (s->codec_id == CODEC_ID_MPEG1VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3306         ff_set_mpeg4_time(s, s->picture_number);
3307 #endif
3308
3309     s->scene_change_score=0;
3310
3311     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
3312
3313     if(s->pict_type==I_TYPE){
3314         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3315         else                        s->no_rounding=0;
3316     }else if(s->pict_type!=B_TYPE){
3317         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3318             s->no_rounding ^= 1;
3319     }
3320
3321     /* Estimate motion for every MB */
3322     s->mb_intra=0; //for the rate distoration & bit compare functions
3323     if(s->pict_type != I_TYPE){
3324         if(s->pict_type != B_TYPE){
3325             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3326                 s->me.pre_pass=1;
3327                 s->me.dia_size= s->avctx->pre_dia_size;
3328
3329                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3330                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3331                         s->mb_x = mb_x;
3332                         s->mb_y = mb_y;
3333                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3334                     }
3335                 }
3336                 s->me.pre_pass=0;
3337             }
3338         }
3339
3340         s->me.dia_size= s->avctx->dia_size;
3341         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3342             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3343             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3344             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3345             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3346             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3347                 s->mb_x = mb_x;
3348                 s->mb_y = mb_y;
3349                 s->block_index[0]+=2;
3350                 s->block_index[1]+=2;
3351                 s->block_index[2]+=2;
3352                 s->block_index[3]+=2;
3353
3354                 /* compute motion vector & mb_type and store in context */
3355                 if(s->pict_type==B_TYPE)
3356                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3357                 else
3358                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3359             }
3360         }
3361     }else /* if(s->pict_type == I_TYPE) */{
3362         /* I-Frame */
3363         //FIXME do we need to zero them?
3364         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3365         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3366         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3367
3368         if(!s->fixed_qscale){
3369             /* finding spatial complexity for I-frame rate control */
3370             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3371                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3372                     int xx = mb_x * 16;
3373                     int yy = mb_y * 16;
3374                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3375                     int varc;
3376                     int sum = s->dsp.pix_sum(pix, s->linesize);
3377
3378                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3379
3380                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3381                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3382                     s->current_picture.mb_var_sum    += varc;
3383                 }
3384             }
3385         }
3386     }
3387     emms_c();
3388
3389     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
3390         s->pict_type= I_TYPE;
3391         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3392 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3393     }
3394
3395     if(!s->umvplus){
3396         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3397             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3398
3399             ff_fix_long_p_mvs(s);
3400         }
3401
3402         if(s->pict_type==B_TYPE){
3403             int a, b;
3404
3405             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3406             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3407             s->f_code = FFMAX(a, b);
3408
3409             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3410             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3411             s->b_code = FFMAX(a, b);
3412
3413             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3414             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3415             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3416             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3417         }
3418     }
3419
3420     if (s->fixed_qscale)
3421         s->frame_qscale = s->current_picture.quality;
3422     else
3423         s->frame_qscale = ff_rate_estimate_qscale(s);
3424
3425     if(s->adaptive_quant){
3426 #ifdef CONFIG_RISKY
3427         switch(s->codec_id){
3428         case CODEC_ID_MPEG4:
3429             ff_clean_mpeg4_qscales(s);
3430             break;
3431         case CODEC_ID_H263:
3432         case CODEC_ID_H263P:
3433         case CODEC_ID_FLV1:
3434             ff_clean_h263_qscales(s);
3435             break;
3436         }
3437 #endif
3438
3439         s->qscale= s->current_picture.qscale_table[0];
3440     }else
3441         s->qscale= (int)(s->frame_qscale + 0.5);
3442
3443     if (s->out_format == FMT_MJPEG) {
3444         /* for mjpeg, we do include qscale in the matrix */
3445         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3446         for(i=1;i<64;i++){
3447             int j= s->dsp.idct_permutation[i];
3448
3449             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3450         }
3451         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3452                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3453     }
3454
3455     //FIXME var duplication
3456     s->current_picture.key_frame= s->pict_type == I_TYPE;
3457     s->current_picture.pict_type= s->pict_type;
3458
3459     if(s->current_picture.key_frame)
3460         s->picture_in_gop_number=0;
3461
3462     s->last_bits= get_bit_count(&s->pb);
3463     switch(s->out_format) {
3464     case FMT_MJPEG:
3465         mjpeg_picture_header(s);
3466         break;
3467 #ifdef CONFIG_RISKY
3468     case FMT_H263:
3469         if (s->codec_id == CODEC_ID_WMV2)
3470             ff_wmv2_encode_picture_header(s, picture_number);
3471         else if (s->h263_msmpeg4)
3472             msmpeg4_encode_picture_header(s, picture_number);
3473         else if (s->h263_pred)
3474             mpeg4_encode_picture_header(s, picture_number);
3475         else if (s->h263_rv10)
3476             rv10_encode_picture_header(s, picture_number);
3477         else if (s->codec_id == CODEC_ID_FLV1)
3478             ff_flv_encode_picture_header(s, picture_number);
3479         else
3480             h263_encode_picture_header(s, picture_number);
3481         break;
3482 #endif
3483     case FMT_MPEG1:
3484         mpeg1_encode_picture_header(s, picture_number);
3485         break;
3486     }
3487     bits= get_bit_count(&s->pb);
3488     s->header_bits= bits - s->last_bits;
3489     s->last_bits= bits;
3490     s->mv_bits=0;
3491     s->misc_bits=0;
3492     s->i_tex_bits=0;
3493     s->p_tex_bits=0;
3494     s->i_count=0;
3495     s->f_count=0;
3496     s->b_count=0;
3497     s->skip_count=0;
3498
3499     for(i=0; i<3; i++){
3500         /* init last dc values */
3501         /* note: quant matrix value (8) is implied here */
3502         s->last_dc[i] = 128;
3503
3504         s->current_picture_ptr->error[i] = 0;
3505     }
3506     s->mb_skip_run = 0;
3507     s->last_mv[0][0][0] = 0;
3508     s->last_mv[0][0][1] = 0;
3509     s->last_mv[1][0][0] = 0;
3510     s->last_mv[1][0][1] = 0;
3511
3512     s->last_mv_dir = 0;
3513
3514 #ifdef CONFIG_RISKY
3515     switch(s->codec_id){
3516     case CODEC_ID_H263:
3517     case CODEC_ID_H263P:
3518     case CODEC_ID_FLV1:
3519         s->gob_index = ff_h263_get_gob_height(s);
3520         break;
3521     case CODEC_ID_MPEG4:
3522         if(s->partitioned_frame)
3523             ff_mpeg4_init_partitions(s);
3524         break;
3525     }
3526 #endif
3527
3528     s->resync_mb_x=0;
3529     s->resync_mb_y=0;
3530     s->first_slice_line = 1;
3531     s->ptr_lastgob = s->pb.buf;
3532     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3533         s->mb_x=0;
3534         s->mb_y= mb_y;
3535
3536         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3537         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3538         ff_init_block_index(s);
3539
3540         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3541             const int xy= mb_y*s->mb_stride + mb_x;
3542             int mb_type= s->mb_type[xy];
3543 //            int d;
3544             int dmin= INT_MAX;
3545
3546             s->mb_x = mb_x;
3547             ff_update_block_index(s);
3548
3549             /* write gob / video packet header  */
3550 #ifdef CONFIG_RISKY
3551             if(s->rtp_mode){
3552                 int current_packet_size, is_gob_start;
3553
3554                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3555                 is_gob_start=0;
3556
3557                 if(s->codec_id==CODEC_ID_MPEG4){
3558                     if(current_packet_size >= s->rtp_payload_size
3559                        && s->mb_y + s->mb_x>0){
3560
3561                         if(s->partitioned_frame){
3562                             ff_mpeg4_merge_partitions(s);
3563                             ff_mpeg4_init_partitions(s);
3564                         }
3565                         ff_mpeg4_encode_video_packet_header(s);
3566
3567                         if(s->flags&CODEC_FLAG_PASS1){
3568                             int bits= get_bit_count(&s->pb);
3569                             s->misc_bits+= bits - s->last_bits;
3570                             s->last_bits= bits;
3571                         }
3572                         ff_mpeg4_clean_buffers(s);
3573                         is_gob_start=1;
3574                     }
3575                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3576                     if(   current_packet_size >= s->rtp_payload_size
3577                        && s->mb_y + s->mb_x>0 && s->mb_skip_run==0){
3578                         ff_mpeg1_encode_slice_header(s);
3579                         ff_mpeg1_clean_buffers(s);
3580                         is_gob_start=1;
3581                     }
3582                 }else{
3583                     if(current_packet_size >= s->rtp_payload_size
3584                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
3585
3586                         h263_encode_gob_header(s, mb_y);
3587                         is_gob_start=1;
3588                     }
3589                 }
3590
3591                 if(is_gob_start){
3592                     s->ptr_lastgob = pbBufPtr(&s->pb);
3593                     s->first_slice_line=1;
3594                     s->resync_mb_x=mb_x;
3595                     s->resync_mb_y=mb_y;
3596                 }
3597             }
3598 #endif
3599
3600             if(  (s->resync_mb_x   == s->mb_x)
3601                && s->resync_mb_y+1 == s->mb_y){
3602                 s->first_slice_line=0;
3603             }
3604
3605             s->mb_skiped=0;
3606
3607             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3608                 int next_block=0;
3609                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3610
3611                 copy_context_before_encode(&backup_s, s, -1);
3612                 backup_s.pb= s->pb;
3613                 best_s.data_partitioning= s->data_partitioning;
3614                 best_s.partitioned_frame= s->partitioned_frame;
3615                 if(s->data_partitioning){
3616                     backup_s.pb2= s->pb2;
3617                     backup_s.tex_pb= s->tex_pb;
3618                 }
3619
3620                 if(mb_type&MB_TYPE_INTER){
3621                     s->mv_dir = MV_DIR_FORWARD;
3622                     s->mv_type = MV_TYPE_16X16;
3623                     s->mb_intra= 0;
3624                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3625                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3626                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
3627                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3628                 }
3629                 if(mb_type&MB_TYPE_INTER4V){
3630                     s->mv_dir = MV_DIR_FORWARD;
3631                     s->mv_type = MV_TYPE_8X8;
3632                     s->mb_intra= 0;
3633                     for(i=0; i<4; i++){
3634                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3635                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3636                     }
3637                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
3638                                  &dmin, &next_block, 0, 0);
3639                 }
3640                 if(mb_type&MB_TYPE_FORWARD){
3641                     s->mv_dir = MV_DIR_FORWARD;
3642                     s->mv_type = MV_TYPE_16X16;
3643                     s->mb_intra= 0;
3644                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3645                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3646                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
3647                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3648                 }
3649                 if(mb_type&MB_TYPE_BACKWARD){
3650                     s->mv_dir = MV_DIR_BACKWARD;
3651                     s->mv_type = MV_TYPE_16X16;
3652                     s->mb_intra= 0;
3653                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3654                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3655                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3656                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3657                 }
3658                 if(mb_type&MB_TYPE_BIDIR){
3659                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3660                     s->mv_type = MV_TYPE_16X16;
3661                     s->mb_intra= 0;
3662                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3663                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3664                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3665                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3666                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
3667                                  &dmin, &next_block, 0, 0);
3668                 }
3669                 if(mb_type&MB_TYPE_DIRECT){
3670                     int mx= s->b_direct_mv_table[xy][0];
3671                     int my= s->b_direct_mv_table[xy][1];
3672
3673                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3674                     s->mb_intra= 0;
3675 #ifdef CONFIG_RISKY
3676                     ff_mpeg4_set_direct_mv(s, mx, my);
3677 #endif
3678                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
3679                                  &dmin, &next_block, mx, my);
3680                 }
3681                 if(mb_type&MB_TYPE_INTRA){
3682                     s->mv_dir = 0;
3683                     s->mv_type = MV_TYPE_16X16;
3684                     s->mb_intra= 1;
3685                     s->mv[0][0][0] = 0;
3686                     s->mv[0][0][1] = 0;
3687                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
3688                                  &dmin, &next_block, 0, 0);
3689                     if(s->h263_pred || s->h263_aic){
3690                         if(best_s.mb_intra)
3691                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3692                         else
3693                             ff_clean_intra_table_entries(s); //old mode?
3694                     }
3695                 }
3696                 copy_context_after_encode(s, &best_s, -1);
3697
3698                 pb_bits_count= get_bit_count(&s->pb);
3699                 flush_put_bits(&s->pb);
3700                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3701                 s->pb= backup_s.pb;
3702
3703                 if(s->data_partitioning){
3704                     pb2_bits_count= get_bit_count(&s->pb2);
3705                     flush_put_bits(&s->pb2);
3706                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3707                     s->pb2= backup_s.pb2;
3708
3709                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3710                     flush_put_bits(&s->tex_pb);
3711                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3712                     s->tex_pb= backup_s.tex_pb;
3713                 }
3714                 s->last_bits= get_bit_count(&s->pb);
3715
3716                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3717                     ff_h263_update_motion_val(s);
3718
3719                 if(next_block==0){
3720                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
3721                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
3722                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
3723                 }
3724
3725                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3726                     MPV_decode_mb(s, s->block);
3727             } else {
3728                 int motion_x, motion_y;
3729                 int intra_score;
3730                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3731
3732               if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
3733                 /* get luma score */
3734                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3735                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3736                 }else{
3737                     uint8_t *dest_y;
3738
3739                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3740                     mean*= 0x01010101;
3741
3742                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3743
3744                     for(i=0; i<16; i++){
3745                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3746                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3747                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3748                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3749                     }
3750
3751                     s->mb_intra=1;
3752                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3753
3754 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8,
3755                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3756                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3757                 }
3758
3759                 /* get chroma score */
3760                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3761                     int i;
3762
3763                     s->mb_intra=1;
3764                     for(i=1; i<3; i++){
3765                         uint8_t *dest_c;
3766                         int mean;
3767
3768                         if(s->out_format == FMT_H263){
3769                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3770                         }else{
3771                             mean= (s->last_dc[i] + 4)>>3;
3772                         }
3773                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3774
3775                         mean*= 0x01010101;
3776                         for(i=0; i<8; i++){
3777                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3778                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3779                         }
3780
3781                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3782                     }
3783                 }
3784
3785                 /* bias */
3786                 switch(s->avctx->mb_cmp&0xFF){
3787                 default:
3788                 case FF_CMP_SAD:
3789                     intra_score+= 32*s->qscale;
3790                     break;
3791                 case FF_CMP_SSE:
3792                     intra_score+= 24*s->qscale*s->qscale;
3793                     break;
3794                 case FF_CMP_SATD:
3795                     intra_score+= 96*s->qscale;
3796                     break;
3797                 case FF_CMP_DCT:
3798                     intra_score+= 48*s->qscale;
3799                     break;
3800                 case FF_CMP_BIT:
3801                     intra_score+= 16;
3802                     break;
3803                 case FF_CMP_PSNR:
3804                 case FF_CMP_RD:
3805                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3806                     break;
3807                 }
3808
3809                 if(intra_score < inter_score)
3810                     mb_type= MB_TYPE_INTRA;
3811               }
3812
3813                 s->mv_type=MV_TYPE_16X16;
3814                 // only one MB-Type possible
3815
3816                 switch(mb_type){
3817                 case MB_TYPE_INTRA:
3818                     s->mv_dir = 0;
3819                     s->mb_intra= 1;
3820                     motion_x= s->mv[0][0][0] = 0;
3821                     motion_y= s->mv[0][0][1] = 0;
3822                     break;
3823                 case MB_TYPE_INTER:
3824                     s->mv_dir = MV_DIR_FORWARD;
3825                     s->mb_intra= 0;
3826                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3827                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3828                     break;
3829                 case MB_TYPE_INTER4V:
3830                     s->mv_dir = MV_DIR_FORWARD;
3831                     s->mv_type = MV_TYPE_8X8;
3832                     s->mb_intra= 0;
3833                     for(i=0; i<4; i++){
3834                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3835                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3836                     }
3837                     motion_x= motion_y= 0;
3838                     break;
3839                 case MB_TYPE_DIRECT:
3840                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3841                     s->mb_intra= 0;
3842                     motion_x=s->b_direct_mv_table[xy][0];
3843                     motion_y=s->b_direct_mv_table[xy][1];
3844 #ifdef CONFIG_RISKY
3845                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3846 #endif
3847                     break;
3848                 case MB_TYPE_BIDIR:
3849                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3850                     s->mb_intra= 0;
3851                     motion_x=0;
3852                     motion_y=0;
3853                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3854                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3855                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3856                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3857                     break;
3858                 case MB_TYPE_BACKWARD:
3859                     s->mv_dir = MV_DIR_BACKWARD;
3860                     s->mb_intra= 0;
3861                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3862                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3863                     break;
3864                 case MB_TYPE_FORWARD:
3865                     s->mv_dir = MV_DIR_FORWARD;
3866                     s->mb_intra= 0;
3867                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3868                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3869 //                    printf(" %d %d ", motion_x, motion_y);
3870                     break;
3871                 default:
3872                     motion_x=motion_y=0; //gcc warning fix
3873                     printf("illegal MB type\n");
3874                 }
3875
3876                 encode_mb(s, motion_x, motion_y);
3877
3878                 // RAL: Update last macrobloc type
3879                 s->last_mv_dir = s->mv_dir;
3880
3881                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3882                     ff_h263_update_motion_val(s);
3883
3884                 MPV_decode_mb(s, s->block);
3885             }
3886
3887             /* clean the MV table in IPS frames for direct mode in B frames */
3888             if(s->mb_intra /* && I,P,S_TYPE */){
3889                 s->p_mv_table[xy][0]=0;
3890                 s->p_mv_table[xy][1]=0;
3891             }
3892
3893             if(s->flags&CODEC_FLAG_PSNR){
3894                 int w= 16;
3895                 int h= 16;
3896
3897                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3898                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3899
3900                 s->current_picture_ptr->error[0] += sse(
3901                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3902                     s->dest[0], w, h, s->linesize);
3903                 s->current_picture_ptr->error[1] += sse(
3904                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3905                     s->dest[1], w>>1, h>>1, s->uvlinesize);
3906                 s->current_picture_ptr->error[2] += sse(
3907                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3908                     s->dest[2], w>>1, h>>1, s->uvlinesize);
3909             }
3910 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3911         }
3912     }
3913     emms_c();
3914
3915 #ifdef CONFIG_RISKY
3916     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3917         ff_mpeg4_merge_partitions(s);
3918
3919     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3920         msmpeg4_encode_ext_header(s);
3921
3922     if(s->codec_id==CODEC_ID_MPEG4)
3923         ff_mpeg4_stuffing(&s->pb);
3924 #endif
3925
3926     //if (s->gob_number)
3927     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3928
3929     /* Send the last GOB if RTP */
3930     if (s->rtp_mode) {
3931         flush_put_bits(&s->pb);
3932         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3933         /* Call the RTP callback to send the last GOB */
3934         if (s->rtp_callback)
3935             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3936         s->ptr_lastgob = pbBufPtr(&s->pb);
3937         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3938     }
3939 }
3940
3941 static int dct_quantize_trellis_c(MpegEncContext *s,
3942                         DCTELEM *block, int n,
3943                         int qscale, int *overflow){
3944     const int *qmat;
3945     const uint8_t *scantable= s->intra_scantable.scantable;
3946     int max=0;
3947     unsigned int threshold1, threshold2;
3948     int bias=0;
3949     int run_tab[65];
3950     int level_tab[65];
3951     int score_tab[65];
3952     int last_run=0;
3953     int last_level=0;
3954     int last_score= 0;
3955     int last_i= 0;
3956     int coeff[3][64];
3957     int coeff_count[64];
3958     int lambda, qmul, qadd, start_i, last_non_zero, i;
3959     const int esc_length= s->ac_esc_length;
3960     uint8_t * length;
3961     uint8_t * last_length;
3962     int score_limit=0;
3963     int left_limit= 0;
3964
3965     s->dsp.fdct (block);
3966
3967     qmul= qscale*16;
3968     qadd= ((qscale-1)|1)*8;
3969
3970     if (s->mb_intra) {
3971         int q;
3972         if (!s->h263_aic) {
3973             if (n < 4)
3974                 q = s->y_dc_scale;
3975             else
3976                 q = s->c_dc_scale;
3977             q = q << 3;
3978         } else{
3979             /* For AIC we skip quant/dequant of INTRADC */
3980             q = 1 << 3;
3981             qadd=0;
3982         }
3983
3984         /* note: block[0] is assumed to be positive */
3985         block[0] = (block[0] + (q >> 1)) / q;
3986         start_i = 1;
3987         last_non_zero = 0;
3988         qmat = s->q_intra_matrix[qscale];
3989         if(s->mpeg_quant || s->codec_id== CODEC_ID_MPEG1VIDEO)
3990             bias= 1<<(QMAT_SHIFT-1);
3991         length     = s->intra_ac_vlc_length;
3992         last_length= s->intra_ac_vlc_last_length;
3993     } else {
3994         start_i = 0;
3995         last_non_zero = -1;
3996         qmat = s->q_inter_matrix[qscale];
3997         length     = s->inter_ac_vlc_length;
3998         last_length= s->inter_ac_vlc_last_length;
3999     }
4000
4001     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4002     threshold2= (threshold1<<1);
4003
4004     for(i=start_i; i<64; i++) {
4005         const int j = scantable[i];
4006         const int k= i-start_i;
4007         int level = block[j];
4008         level = level * qmat[j];
4009
4010 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4011 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4012         if(((unsigned)(level+threshold1))>threshold2){
4013             if(level>0){
4014                 level= (bias + level)>>QMAT_SHIFT;
4015                 coeff[0][k]= level;
4016                 coeff[1][k]= level-1;
4017 //                coeff[2][k]= level-2;
4018             }else{
4019                 level= (bias - level)>>QMAT_SHIFT;
4020                 coeff[0][k]= -level;
4021                 coeff[1][k]= -level+1;
4022 //                coeff[2][k]= -level+2;
4023             }
4024             coeff_count[k]= FFMIN(level, 2);
4025             max |=level;
4026             last_non_zero = i;
4027         }else{
4028             coeff[0][k]= (level>>31)|1;
4029             coeff_count[k]= 1;
4030         }
4031     }
4032
4033     *overflow= s->max_qcoeff < max; //overflow might have happend
4034
4035     if(last_non_zero < start_i){
4036         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4037         return last_non_zero;
4038     }
4039
4040     lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
4041
4042     score_tab[0]= 0;
4043     for(i=0; i<=last_non_zero - start_i; i++){
4044         int level_index, run, j;
4045         const int dct_coeff= block[ scantable[i + start_i] ];
4046         const int zero_distoration= dct_coeff*dct_coeff;
4047         int best_score=256*256*256*120;
4048
4049         last_score += zero_distoration;
4050         for(level_index=0; level_index < coeff_count[i]; level_index++){
4051             int distoration;
4052             int level= coeff[level_index][i];
4053             int unquant_coeff;
4054
4055             assert(level);
4056
4057             if(s->out_format == FMT_H263){
4058                 if(level>0){
4059                     unquant_coeff= level*qmul + qadd;
4060                 }else{
4061                     unquant_coeff= level*qmul - qadd;
4062                 }
4063             }else{ //MPEG1
4064                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4065                 if(s->mb_intra){
4066                     if (level < 0) {
4067                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4068                         unquant_coeff = -((unquant_coeff - 1) | 1);
4069                     } else {
4070                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4071                         unquant_coeff =   (unquant_coeff - 1) | 1;
4072                     }
4073                 }else{
4074                     if (level < 0) {
4075                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4076                         unquant_coeff = -((unquant_coeff - 1) | 1);
4077                     } else {
4078                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4079                         unquant_coeff =   (unquant_coeff - 1) | 1;
4080                     }
4081                 }
4082                 unquant_coeff<<= 3;
4083             }
4084
4085             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4086             level+=64;
4087             if((level&(~127)) == 0){
4088                 for(run=0; run<=i - left_limit; run++){
4089                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4090                     score += score_tab[i-run];
4091
4092                     if(score < best_score){
4093                         best_score=
4094                         score_tab[i+1]= score;
4095                         run_tab[i+1]= run;
4096                         level_tab[i+1]= level-64;
4097                     }
4098                 }
4099
4100                 if(s->out_format == FMT_H263){
4101                     for(run=0; run<=i - left_limit; run++){
4102                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4103                         score += score_tab[i-run];
4104                         if(score < last_score){
4105                             last_score= score;
4106                             last_run= run;
4107                             last_level= level-64;
4108                             last_i= i+1;
4109                         }
4110                     }
4111                 }
4112             }else{
4113                 distoration += esc_length*lambda;
4114                 for(run=0; run<=i - left_limit; run++){
4115                     int score= distoration + score_tab[i-run];
4116
4117                     if(score < best_score){
4118                         best_score=
4119                         score_tab[i+1]= score;
4120                         run_tab[i+1]= run;
4121                         level_tab[i+1]= level-64;
4122                     }
4123                 }
4124
4125                 if(s->out_format == FMT_H263){
4126                     for(run=0; run<=i - left_limit; run++){
4127                         int score= distoration + score_tab[i-run];
4128                         if(score < last_score){
4129                             last_score= score;
4130                             last_run= run;
4131                             last_level= level-64;
4132                             last_i= i+1;
4133                         }
4134                     }
4135                 }
4136             }
4137         }
4138
4139         for(j=left_limit; j<=i; j++){
4140             score_tab[j] += zero_distoration;
4141         }
4142         score_limit+= zero_distoration;
4143         if(score_tab[i+1] < score_limit)
4144             score_limit= score_tab[i+1];
4145
4146         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4147         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4148     }
4149
4150         //FIXME add some cbp penalty
4151
4152     if(s->out_format != FMT_H263){
4153         last_score= 256*256*256*120;
4154         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4155             int score= score_tab[i];
4156             if(i) score += lambda*2; //FIXME exacter?
4157
4158             if(score < last_score){
4159                 last_score= score;
4160                 last_i= i;
4161                 last_level= level_tab[i];
4162                 last_run= run_tab[i];
4163             }
4164         }
4165     }
4166
4167     last_non_zero= last_i - 1 + start_i;
4168     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4169
4170     if(last_non_zero < start_i)
4171         return last_non_zero;
4172
4173     i= last_i;
4174     assert(last_level);
4175 //FIXME use permutated scantable
4176     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4177     i -= last_run + 1;
4178
4179     for(;i>0 ; i -= run_tab[i] + 1){
4180         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4181
4182         block[j]= level_tab[i];
4183         assert(block[j]);
4184     }
4185
4186     return last_non_zero;
4187 }
4188
4189 static int dct_quantize_c(MpegEncContext *s,
4190                         DCTELEM *block, int n,
4191                         int qscale, int *overflow)
4192 {
4193     int i, j, level, last_non_zero, q;
4194     const int *qmat;
4195     const uint8_t *scantable= s->intra_scantable.scantable;
4196     int bias;
4197     int max=0;
4198     unsigned int threshold1, threshold2;
4199
4200     s->dsp.fdct (block);
4201
4202     if (s->mb_intra) {
4203         if (!s->h263_aic) {
4204             if (n < 4)
4205                 q = s->y_dc_scale;
4206             else
4207                 q = s->c_dc_scale;
4208             q = q << 3;
4209         } else
4210             /* For AIC we skip quant/dequant of INTRADC */
4211             q = 1 << 3;
4212
4213         /* note: block[0] is assumed to be positive */
4214         block[0] = (block[0] + (q >> 1)) / q;
4215         i = 1;
4216         last_non_zero = 0;
4217         qmat = s->q_intra_matrix[qscale];
4218         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4219     } else {
4220         i = 0;
4221         last_non_zero = -1;
4222         qmat = s->q_inter_matrix[qscale];
4223         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4224     }
4225     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4226     threshold2= (threshold1<<1);
4227
4228     for(;i<64;i++) {
4229         j = scantable[i];
4230         level = block[j];
4231         level = level * qmat[j];
4232
4233 //        if(   bias+level >= (1<<QMAT_SHIFT)
4234 //           || bias-level >= (1<<QMAT_SHIFT)){
4235         if(((unsigned)(level+threshold1))>threshold2){
4236             if(level>0){
4237                 level= (bias + level)>>QMAT_SHIFT;
4238                 block[j]= level;
4239             }else{
4240                 level= (bias - level)>>QMAT_SHIFT;
4241                 block[j]= -level;
4242             }
4243             max |=level;
4244             last_non_zero = i;
4245         }else{
4246             block[j]=0;
4247         }
4248     }
4249     *overflow= s->max_qcoeff < max; //overflow might have happend
4250
4251     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4252     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4253         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4254
4255     return last_non_zero;
4256 }
4257
4258 #endif //CONFIG_ENCODERS
4259
4260 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
4261                                    DCTELEM *block, int n, int qscale)
4262 {
4263     int i, level, nCoeffs;
4264     const uint16_t *quant_matrix;
4265
4266     nCoeffs= s->block_last_index[n];
4267
4268     if (s->mb_intra) {
4269         if (n < 4)
4270             block[0] = block[0] * s->y_dc_scale;
4271         else
4272             block[0] = block[0] * s->c_dc_scale;
4273         /* XXX: only mpeg1 */
4274         quant_matrix = s->intra_matrix;
4275         for(i=1;i<=nCoeffs;i++) {
4276             int j= s->intra_scantable.permutated[i];
4277             level = block[j];
4278             if (level) {
4279                 if (level < 0) {
4280                     level = -level;
4281                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4282                     level = (level - 1) | 1;
4283                     level = -level;
4284                 } else {
4285                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4286                     level = (level - 1) | 1;
4287                 }
4288 #ifdef PARANOID
4289                 if (level < -2048 || level > 2047)
4290                     fprintf(stderr, "unquant error %d %d\n", i, level);
4291 #endif
4292                 block[j] = level;
4293             }
4294         }
4295     } else {
4296         i = 0;
4297         quant_matrix = s->inter_matrix;
4298         for(;i<=nCoeffs;i++) {
4299             int j= s->intra_scantable.permutated[i];
4300             level = block[j];
4301             if (level) {
4302                 if (level < 0) {
4303                     level = -level;
4304                     level = (((level << 1) + 1) * qscale *
4305                              ((int) (quant_matrix[j]))) >> 4;
4306                     level = (level - 1) | 1;
4307                     level = -level;
4308                 } else {
4309                     level = (((level << 1) + 1) * qscale *
4310                              ((int) (quant_matrix[j]))) >> 4;
4311                     level = (level - 1) | 1;
4312                 }
4313 #ifdef PARANOID
4314                 if (level < -2048 || level > 2047)
4315                     fprintf(stderr, "unquant error %d %d\n", i, level);
4316 #endif
4317                 block[j] = level;
4318             }
4319         }
4320     }
4321 }
4322
4323 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
4324                                    DCTELEM *block, int n, int qscale)
4325 {
4326     int i, level, nCoeffs;
4327     const uint16_t *quant_matrix;
4328
4329     if(s->alternate_scan) nCoeffs= 63;
4330     else nCoeffs= s->block_last_index[n];
4331
4332     if (s->mb_intra) {
4333         if (n < 4)
4334             block[0] = block[0] * s->y_dc_scale;
4335         else
4336             block[0] = block[0] * s->c_dc_scale;
4337         quant_matrix = s->intra_matrix;
4338         for(i=1;i<=nCoeffs;i++) {
4339             int j= s->intra_scantable.permutated[i];
4340             level = block[j];
4341             if (level) {
4342                 if (level < 0) {
4343                     level = -level;
4344                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4345                     level = -level;
4346                 } else {
4347                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4348                 }
4349 #ifdef PARANOID
4350                 if (level < -2048 || level > 2047)
4351                     fprintf(stderr, "unquant error %d %d\n", i, level);
4352 #endif
4353                 block[j] = level;
4354             }
4355         }
4356     } else {
4357         int sum=-1;
4358         i = 0;
4359         quant_matrix = s->inter_matrix;
4360         for(;i<=nCoeffs;i++) {
4361             int j= s->intra_scantable.permutated[i];
4362             level = block[j];
4363             if (level) {
4364                 if (level < 0) {
4365                     level = -level;
4366                     level = (((level << 1) + 1) * qscale *
4367                              ((int) (quant_matrix[j]))) >> 4;
4368                     level = -level;
4369                 } else {
4370                     level = (((level << 1) + 1) * qscale *
4371                              ((int) (quant_matrix[j]))) >> 4;
4372                 }
4373 #ifdef PARANOID
4374                 if (level < -2048 || level > 2047)
4375                     fprintf(stderr, "unquant error %d %d\n", i, level);
4376 #endif
4377                 block[j] = level;
4378                 sum+=level;
4379             }
4380         }
4381         block[63]^=sum&1;
4382     }
4383 }
4384
4385
4386 static void dct_unquantize_h263_c(MpegEncContext *s,
4387                                   DCTELEM *block, int n, int qscale)
4388 {
4389     int i, level, qmul, qadd;
4390     int nCoeffs;
4391
4392     assert(s->block_last_index[n]>=0);
4393
4394     qadd = (qscale - 1) | 1;
4395     qmul = qscale << 1;
4396
4397     if (s->mb_intra) {
4398         if (!s->h263_aic) {
4399             if (n < 4)
4400                 block[0] = block[0] * s->y_dc_scale;
4401             else
4402                 block[0] = block[0] * s->c_dc_scale;
4403         }else
4404             qadd = 0;
4405         i = 1;
4406         nCoeffs= 63; //does not allways use zigzag table
4407     } else {
4408         i = 0;
4409         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4410     }
4411
4412     for(;i<=nCoeffs;i++) {
4413         level = block[i];
4414         if (level) {
4415             if (level < 0) {
4416                 level = level * qmul - qadd;
4417             } else {
4418                 level = level * qmul + qadd;
4419             }
4420 #ifdef PARANOID
4421                 if (level < -2048 || level > 2047)
4422                     fprintf(stderr, "unquant error %d %d\n", i, level);
4423 #endif
4424             block[i] = level;
4425         }
4426     }
4427 }
4428
4429
4430 static const AVOption mpeg4_options[] =
4431 {
4432     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4433     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4434                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4435                        bit_rate_tolerance, 4, 240000000, 8000),
4436     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4437     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4438     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4439                           rc_eq, "tex^qComp,option1,options2", 0),
4440     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4441                        rc_min_rate, 4, 24000000, 0),
4442     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4443                        rc_max_rate, 4, 24000000, 0),
4444     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4445                           rc_buffer_aggressivity, 4, 24000000, 0),
4446     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4447                           rc_initial_cplx, 0., 9999999., 0),
4448     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4449                           i_quant_factor, 0., 0., 0),
4450     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4451                           i_quant_factor, -999999., 999999., 0),
4452     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4453                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4454     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4455                           lumi_masking, 0., 999999., 0),
4456     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4457                           temporal_cplx_masking, 0., 999999., 0),
4458     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4459                           spatial_cplx_masking, 0., 999999., 0),
4460     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4461                           p_masking, 0., 999999., 0),
4462     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4463                           dark_masking, 0., 999999., 0),
4464     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4465                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4466
4467     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4468                        mb_qmin, 0, 8, 0),
4469     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4470                        mb_qmin, 0, 8, 0),
4471
4472     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4473                        me_cmp, 0, 24000000, 0),
4474     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4475                        me_sub_cmp, 0, 24000000, 0),
4476
4477
4478     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4479                        dia_size, 0, 24000000, 0),
4480     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4481                        last_predictor_count, 0, 24000000, 0),
4482
4483     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4484                        pre_me, 0, 24000000, 0),
4485     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4486                        me_pre_cmp, 0, 24000000, 0),
4487
4488     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4489                        me_range, 0, 24000000, 0),
4490     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4491                        pre_dia_size, 0, 24000000, 0),
4492     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4493                        me_subpel_quality, 0, 24000000, 0),
4494     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4495                        me_range, 0, 24000000, 0),
4496     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4497                         flags, CODEC_FLAG_PSNR, 0),
4498     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4499                               rc_override),
4500     AVOPTION_SUB(avoptions_common),
4501     AVOPTION_END()
4502 };
4503
4504 #ifdef CONFIG_ENCODERS
4505
4506 AVCodec mpeg1video_encoder = {
4507     "mpeg1video",
4508     CODEC_TYPE_VIDEO,
4509     CODEC_ID_MPEG1VIDEO,
4510     sizeof(MpegEncContext),
4511     MPV_encode_init,
4512     MPV_encode_picture,
4513     MPV_encode_end,
4514 };
4515
4516 #ifdef CONFIG_RISKY
4517
4518 AVCodec h263_encoder = {
4519     "h263",
4520     CODEC_TYPE_VIDEO,
4521     CODEC_ID_H263,
4522     sizeof(MpegEncContext),
4523     MPV_encode_init,
4524     MPV_encode_picture,
4525     MPV_encode_end,
4526 };
4527
4528 AVCodec h263p_encoder = {
4529     "h263p",
4530     CODEC_TYPE_VIDEO,
4531     CODEC_ID_H263P,
4532     sizeof(MpegEncContext),
4533     MPV_encode_init,
4534     MPV_encode_picture,
4535     MPV_encode_end,
4536 };
4537
4538 AVCodec flv_encoder = {
4539     "flv",
4540     CODEC_TYPE_VIDEO,
4541     CODEC_ID_FLV1,
4542     sizeof(MpegEncContext),
4543     MPV_encode_init,
4544     MPV_encode_picture,
4545     MPV_encode_end,
4546 };
4547
4548 AVCodec rv10_encoder = {
4549     "rv10",
4550     CODEC_TYPE_VIDEO,
4551     CODEC_ID_RV10,
4552     sizeof(MpegEncContext),
4553     MPV_encode_init,
4554     MPV_encode_picture,
4555     MPV_encode_end,
4556 };
4557
4558 AVCodec mpeg4_encoder = {
4559     "mpeg4",
4560     CODEC_TYPE_VIDEO,
4561     CODEC_ID_MPEG4,
4562     sizeof(MpegEncContext),
4563     MPV_encode_init,
4564     MPV_encode_picture,
4565     MPV_encode_end,
4566     .options = mpeg4_options,
4567 };
4568
4569 AVCodec msmpeg4v1_encoder = {
4570     "msmpeg4v1",
4571     CODEC_TYPE_VIDEO,
4572     CODEC_ID_MSMPEG4V1,
4573     sizeof(MpegEncContext),
4574     MPV_encode_init,
4575     MPV_encode_picture,
4576     MPV_encode_end,
4577     .options = mpeg4_options,
4578 };
4579
4580 AVCodec msmpeg4v2_encoder = {
4581     "msmpeg4v2",
4582     CODEC_TYPE_VIDEO,
4583     CODEC_ID_MSMPEG4V2,
4584     sizeof(MpegEncContext),
4585     MPV_encode_init,
4586     MPV_encode_picture,
4587     MPV_encode_end,
4588     .options = mpeg4_options,
4589 };
4590
4591 AVCodec msmpeg4v3_encoder = {
4592     "msmpeg4",
4593     CODEC_TYPE_VIDEO,
4594     CODEC_ID_MSMPEG4V3,
4595     sizeof(MpegEncContext),
4596     MPV_encode_init,
4597     MPV_encode_picture,
4598     MPV_encode_end,
4599     .options = mpeg4_options,
4600 };
4601
4602 AVCodec wmv1_encoder = {
4603     "wmv1",
4604     CODEC_TYPE_VIDEO,
4605     CODEC_ID_WMV1,
4606     sizeof(MpegEncContext),
4607     MPV_encode_init,
4608     MPV_encode_picture,
4609     MPV_encode_end,
4610     .options = mpeg4_options,
4611 };
4612
4613 #endif
4614
4615 AVCodec mjpeg_encoder = {
4616     "mjpeg",
4617     CODEC_TYPE_VIDEO,
4618     CODEC_ID_MJPEG,
4619     sizeof(MpegEncContext),
4620     MPV_encode_init,
4621     MPV_encode_picture,
4622     MPV_encode_end,
4623 };
4624
4625 #endif //CONFIG_ENCODERS
4626