git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 #include <ctype.h>
  23 #include "avcodec.h"
  24 #include "dsputil.h"
  25 #include "mpegvideo.h"
  26
  27 #ifdef USE_FASTMEMCPY
  28 #include "fastmemcpy.h"
  29 #endif
  30
  31 static void encode_picture(MpegEncContext *s, int picture_number);
  32 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  33                                    DCTELEM *block, int n, int qscale);
  34 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  35                                    DCTELEM *block, int n, int qscale);
  36 static void dct_unquantize_h263_c(MpegEncContext *s,
  37                                   DCTELEM *block, int n, int qscale);
  38 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
  39 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  40
  41 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
  42 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
  43                                     int src_x, int src_y, int w, int h);
  44
  45 #define EDGE_WIDTH 16
  46
  47 /* enable all paranoid tests for rounding, overflows, etc... */
  48 //#define PARANOID
  49
  50 //#define DEBUG
  51
  52
  53 /* for jpeg fast DCT */
  54 #define CONST_BITS 14
  55
  56 static const unsigned short aanscales[64] = {
  57     /* precomputed values scaled up by 14 bits */
  58     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  59     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  60     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  61     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  62     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  63     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  64     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  65     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  66 };
  67
  68 static UINT8 h263_chroma_roundtab[16] = {
  69     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  70 };
  71
  72 static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
  73 static UINT8 default_fcode_tab[MAX_MV*2+1];
  74
  75 extern UINT8 zigzag_end[64];
  76
  77 /* default motion estimation */
  78 int motion_estimation_method = ME_EPZS;
  79
  80 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  81                            const UINT16 *quant_matrix, int bias)
  82 {
  83     int qscale;
  84
  85     for(qscale=1; qscale<32; qscale++){
  86         int i;
  87         if (s->fdct == ff_jpeg_fdct_islow) {
  88             for(i=0;i<64;i++) {
  89                 const int j= block_permute_op(i);
  90                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
  91                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
  92                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
  93                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
  94
  95                 qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  96                                 (qscale * quant_matrix[j]));
  97             }
  98         } else if (s->fdct == fdct_ifast) {
  99             for(i=0;i<64;i++) {
 100                 const int j= block_permute_op(i);
 101                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 102                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 103                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 104                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 105
 106                 qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 107                                 (aanscales[i] * qscale * quant_matrix[j]));
 108             }
 109         } else {
 110             for(i=0;i<64;i++) {
 111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 112                    So 16           <= qscale * quant_matrix[i]             <= 7905
 113                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 114                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 115                 */
 116                 qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 117                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
 118
 119                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 120                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 121             }
 122         }
 123     }
 124 }
 125 // move into common.c perhaps
 126 #define CHECKED_ALLOCZ(p, size)\
 127 {\
 128     p= av_mallocz(size);\
 129     if(p==NULL){\
 130         perror("malloc");\
 131         goto fail;\
 132     }\
 133 }
 134
 135 /* init common structure for both encoder and decoder */
 136 int MPV_common_init(MpegEncContext *s)
 137 {
 138     int c_size, i;
 139     UINT8 *pict;
 140
 141     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 142     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 143     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 144     s->dct_quantize= dct_quantize_c;
 145
 146     if(s->avctx->dct_algo==FF_DCT_FASTINT)
 147         s->fdct = fdct_ifast;
 148     else
 149         s->fdct = ff_jpeg_fdct_islow;
 150
 151 #ifdef HAVE_MMX
 152     MPV_common_init_mmx(s);
 153 #endif
 154 #ifdef ARCH_ALPHA
 155     MPV_common_init_axp(s);
 156 #endif
 157 #ifdef HAVE_MLIB
 158     MPV_common_init_mlib(s);
 159 #endif
 160
 161     s->mb_width = (s->width + 15) / 16;
 162     s->mb_height = (s->height + 15) / 16;
 163
 164     /* set default edge pos, will be overriden in decode_header if needed */
 165     s->h_edge_pos= s->mb_width*16;
 166     s->v_edge_pos= s->mb_height*16;
 167
 168     /* convert fourcc to upper case */
 169     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)
 170                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
 171                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16)
 172                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
 173
 174     s->mb_num = s->mb_width * s->mb_height;
 175     if(!(s->flags&CODEC_FLAG_DR1)){
 176       s->linesize   = s->mb_width * 16 + 2 * EDGE_WIDTH;
 177       s->uvlinesize = s->mb_width * 8  +     EDGE_WIDTH;
 178
 179       for(i=0;i<3;i++) {
 180         int w, h, shift, pict_start;
 181
 182         w = s->linesize;
 183         h = s->mb_height * 16 + 2 * EDGE_WIDTH;
 184         shift = (i == 0) ? 0 : 1;
 185         c_size = (s->linesize>>shift) * (h >> shift);
 186         pict_start = (s->linesize>>shift) * (EDGE_WIDTH >> shift) + (EDGE_WIDTH >> shift);
 187
 188         CHECKED_ALLOCZ(pict, c_size)
 189         s->last_picture_base[i] = pict;
 190         s->last_picture[i] = pict + pict_start;
 191         if(i>0) memset(s->last_picture_base[i], 128, c_size);
 192
 193         CHECKED_ALLOCZ(pict, c_size)
 194         s->next_picture_base[i] = pict;
 195         s->next_picture[i] = pict + pict_start;
 196         if(i>0) memset(s->next_picture_base[i], 128, c_size);
 197
 198         if (s->has_b_frames || s->codec_id==CODEC_ID_MPEG4) {
 199         /* Note the MPEG4 stuff is here cuz of buggy encoders which dont set the low_delay flag but
 200            do low-delay encoding, so we cant allways distinguish b-frame containing streams from low_delay streams */
 201             CHECKED_ALLOCZ(pict, c_size)
 202             s->aux_picture_base[i] = pict;
 203             s->aux_picture[i] = pict + pict_start;
 204             if(i>0) memset(s->aux_picture_base[i], 128, c_size);
 205         }
 206       }
 207       s->ip_buffer_count= 2;
 208     }
 209
 210     CHECKED_ALLOCZ(s->edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 211
 212     if (s->encoding) {
 213         int j;
 214         int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
 215
 216         CHECKED_ALLOCZ(s->mb_var   , s->mb_num * sizeof(INT16))
 217         CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16))
 218         CHECKED_ALLOCZ(s->mb_mean  , s->mb_num * sizeof(INT8))
 219
 220         /* Allocate MV tables */
 221         CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(INT16))
 222         CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(INT16))
 223         CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(INT16))
 224         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
 225         CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
 226         CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
 227         CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
 228         CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(INT16))
 229
 230         CHECKED_ALLOCZ(s->me_scratchpad,  s->linesize*16*3*sizeof(uint8_t))
 231
 232         CHECKED_ALLOCZ(s->me_map      , ME_MAP_SIZE*sizeof(uint32_t))
 233         CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
 234
 235         if(s->max_b_frames){
 236             for(j=0; j<REORDER_BUFFER_SIZE; j++){
 237                 int i;
 238                 for(i=0;i<3;i++) {
 239                     int w, h, shift;
 240
 241                     w = s->linesize;
 242                     h = s->mb_height * 16;
 243                     shift = (i == 0) ? 0 : 1;
 244                     c_size = (w >> shift) * (h >> shift);
 245
 246                     CHECKED_ALLOCZ(pict, c_size);
 247                     s->picture_buffer[j][i] = pict;
 248                 }
 249             }
 250         }
 251
 252         if(s->codec_id==CODEC_ID_MPEG4){
 253             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 254             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 255         }
 256
 257         if(s->msmpeg4_version){
 258             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 259         }
 260         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 261     }
 262
 263     if (s->out_format == FMT_H263 || s->encoding) {
 264         int size;
 265         /* Allocate MB type table */
 266         CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(UINT8))
 267
 268         /* MV prediction */
 269         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 270         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
 271     }
 272
 273     if(s->codec_id==CODEC_ID_MPEG4){
 274         /* 4mv and interlaced direct mode decoding tables */
 275         CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8))
 276         CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16))
 277         CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8))
 278     }
 279
 280     if (s->h263_pred || s->h263_plus) {
 281         int y_size, c_size, i, size;
 282
 283         /* dc values */
 284
 285         y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 286         c_size = (s->mb_width + 2) * (s->mb_height + 2);
 287         size = y_size + 2 * c_size;
 288         CHECKED_ALLOCZ(s->dc_val[0], size * sizeof(INT16));
 289         s->dc_val[1] = s->dc_val[0] + y_size;
 290         s->dc_val[2] = s->dc_val[1] + c_size;
 291         for(i=0;i<size;i++)
 292             s->dc_val[0][i] = 1024;
 293
 294         /* ac values */
 295         CHECKED_ALLOCZ(s->ac_val[0], size * sizeof(INT16) * 16);
 296         s->ac_val[1] = s->ac_val[0] + y_size;
 297         s->ac_val[2] = s->ac_val[1] + c_size;
 298
 299         /* cbp values */
 300         CHECKED_ALLOCZ(s->coded_block, y_size);
 301
 302         /* divx501 bitstream reorder buffer */
 303         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 304
 305         /* cbp, ac_pred, pred_dir */
 306         CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(UINT8))
 307         CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8))
 308     }
 309     CHECKED_ALLOCZ(s->qscale_table  , s->mb_num * sizeof(UINT8))
 310
 311     /* which mb is a intra block */
 312     CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
 313     memset(s->mbintra_table, 1, s->mb_num);
 314
 315     /* default structure is frame */
 316     s->picture_structure = PICT_FRAME;
 317
 318     /* init macroblock skip table */
 319     CHECKED_ALLOCZ(s->mbskip_table, s->mb_num);
 320
 321     s->block= s->blocks[0];
 322
 323     s->context_initialized = 1;
 324     return 0;
 325  fail:
 326     MPV_common_end(s);
 327     return -1;
 328 }
 329
 330
 331 //extern int sads;
 332
 333 /* init common structure for both encoder and decoder */
 334 void MPV_common_end(MpegEncContext *s)
 335 {
 336     int i;
 337
 338     av_freep(&s->mb_type);
 339     av_freep(&s->mb_var);
 340     av_freep(&s->mc_mb_var);
 341     av_freep(&s->mb_mean);
 342     av_freep(&s->p_mv_table);
 343     av_freep(&s->b_forw_mv_table);
 344     av_freep(&s->b_back_mv_table);
 345     av_freep(&s->b_bidir_forw_mv_table);
 346     av_freep(&s->b_bidir_back_mv_table);
 347     av_freep(&s->b_direct_forw_mv_table);
 348     av_freep(&s->b_direct_back_mv_table);
 349     av_freep(&s->b_direct_mv_table);
 350     av_freep(&s->motion_val);
 351     av_freep(&s->dc_val[0]);
 352     av_freep(&s->ac_val[0]);
 353     av_freep(&s->coded_block);
 354     av_freep(&s->mbintra_table);
 355     av_freep(&s->cbp_table);
 356     av_freep(&s->pred_dir_table);
 357     av_freep(&s->qscale_table);
 358     av_freep(&s->me_scratchpad);
 359     av_freep(&s->me_map);
 360     av_freep(&s->me_score_map);
 361
 362     av_freep(&s->mbskip_table);
 363     av_freep(&s->bitstream_buffer);
 364     av_freep(&s->tex_pb_buffer);
 365     av_freep(&s->pb2_buffer);
 366     av_freep(&s->edge_emu_buffer);
 367     av_freep(&s->co_located_type_table);
 368     av_freep(&s->field_mv_table);
 369     av_freep(&s->field_select_table);
 370     av_freep(&s->avctx->stats_out);
 371     av_freep(&s->ac_stats);
 372
 373     for(i=0;i<3;i++) {
 374         int j;
 375         if(!(s->flags&CODEC_FLAG_DR1)){
 376             av_freep(&s->last_picture_base[i]);
 377             av_freep(&s->next_picture_base[i]);
 378             av_freep(&s->aux_picture_base[i]);
 379         }
 380         s->last_picture_base[i]=
 381         s->next_picture_base[i]=
 382         s->aux_picture_base [i] = NULL;
 383         s->last_picture[i]=
 384         s->next_picture[i]=
 385         s->aux_picture [i] = NULL;
 386
 387         for(j=0; j<REORDER_BUFFER_SIZE; j++){
 388             av_freep(&s->picture_buffer[j][i]);
 389         }
 390     }
 391     s->context_initialized = 0;
 392 }
 393
 394 /* init video encoder */
 395 int MPV_encode_init(AVCodecContext *avctx)
 396 {
 397     MpegEncContext *s = avctx->priv_data;
 398     int i;
 399
 400     avctx->pix_fmt = PIX_FMT_YUV420P;
 401
 402     s->bit_rate = avctx->bit_rate;
 403     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 404     s->frame_rate = avctx->frame_rate;
 405     s->width = avctx->width;
 406     s->height = avctx->height;
 407     if(avctx->gop_size > 600){
 408         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 409         avctx->gop_size=600;
 410     }
 411     s->gop_size = avctx->gop_size;
 412     s->rtp_mode = avctx->rtp_mode;
 413     s->rtp_payload_size = avctx->rtp_payload_size;
 414     if (avctx->rtp_callback)
 415         s->rtp_callback = avctx->rtp_callback;
 416     s->qmin= avctx->qmin;
 417     s->qmax= avctx->qmax;
 418     s->max_qdiff= avctx->max_qdiff;
 419     s->qcompress= avctx->qcompress;
 420     s->qblur= avctx->qblur;
 421     s->avctx = avctx;
 422     s->aspect_ratio_info= avctx->aspect_ratio_info;
 423     if (avctx->aspect_ratio_info == FF_ASPECT_EXTENDED)
 424     {
 425         s->aspected_width = avctx->aspected_width;
 426         s->aspected_height = avctx->aspected_height;
 427     }
 428     s->flags= avctx->flags;
 429     s->max_b_frames= avctx->max_b_frames;
 430     s->b_frame_strategy= avctx->b_frame_strategy;
 431     s->codec_id= avctx->codec->id;
 432     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 433     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 434     s->strict_std_compliance= avctx->strict_std_compliance;
 435     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 436     s->mpeg_quant= avctx->mpeg_quant;
 437
 438     if (s->gop_size <= 1) {
 439         s->intra_only = 1;
 440         s->gop_size = 12;
 441     } else {
 442         s->intra_only = 0;
 443     }
 444
 445     /* ME algorithm */
 446     if (avctx->me_method == 0)
 447         /* For compatibility */
 448         s->me_method = motion_estimation_method;
 449     else
 450         s->me_method = avctx->me_method;
 451
 452     /* Fixed QSCALE */
 453     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 454
 455     s->adaptive_quant= (   s->avctx->lumi_masking
 456                         || s->avctx->dark_masking
 457                         || s->avctx->temporal_cplx_masking
 458                         || s->avctx->spatial_cplx_masking
 459                         || s->avctx->p_masking)
 460                        && !s->fixed_qscale;
 461
 462     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 463
 464     switch(avctx->codec->id) {
 465     case CODEC_ID_MPEG1VIDEO:
 466         s->out_format = FMT_MPEG1;
 467         avctx->delay=0; //FIXME not sure, should check the spec
 468         break;
 469     case CODEC_ID_MJPEG:
 470         s->out_format = FMT_MJPEG;
 471         s->intra_only = 1; /* force intra only for jpeg */
 472         s->mjpeg_write_tables = 1; /* write all tables */
 473         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 474         s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
 475         s->mjpeg_vsample[1] = 1; /* the only currently supported values */
 476         s->mjpeg_vsample[2] = 1;
 477         s->mjpeg_hsample[0] = 2;
 478         s->mjpeg_hsample[1] = 1;
 479         s->mjpeg_hsample[2] = 1;
 480         if (mjpeg_init(s) < 0)
 481             return -1;
 482         avctx->delay=0;
 483         break;
 484     case CODEC_ID_H263:
 485         if (h263_get_picture_format(s->width, s->height) == 7) {
 486             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 487             return -1;
 488         }
 489         s->out_format = FMT_H263;
 490         avctx->delay=0;
 491         break;
 492     case CODEC_ID_H263P:
 493         s->out_format = FMT_H263;
 494         s->rtp_mode = 1;
 495         s->rtp_payload_size = 1200;
 496         s->h263_plus = 1;
 497         s->unrestricted_mv = 1;
 498         s->h263_aic = 1;
 499
 500         /* These are just to be sure */
 501         s->umvplus = 0;
 502         s->umvplus_dec = 0;
 503         avctx->delay=0;
 504         break;
 505     case CODEC_ID_RV10:
 506         s->out_format = FMT_H263;
 507         s->h263_rv10 = 1;
 508         avctx->delay=0;
 509         break;
 510     case CODEC_ID_MPEG4:
 511         s->out_format = FMT_H263;
 512         s->h263_pred = 1;
 513         s->unrestricted_mv = 1;
 514         s->has_b_frames= s->max_b_frames ? 1 : 0;
 515         s->low_delay=0;
 516         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 517         break;
 518     case CODEC_ID_MSMPEG4V1:
 519         s->out_format = FMT_H263;
 520         s->h263_msmpeg4 = 1;
 521         s->h263_pred = 1;
 522         s->unrestricted_mv = 1;
 523         s->msmpeg4_version= 1;
 524         avctx->delay=0;
 525         break;
 526     case CODEC_ID_MSMPEG4V2:
 527         s->out_format = FMT_H263;
 528         s->h263_msmpeg4 = 1;
 529         s->h263_pred = 1;
 530         s->unrestricted_mv = 1;
 531         s->msmpeg4_version= 2;
 532         avctx->delay=0;
 533         break;
 534     case CODEC_ID_MSMPEG4V3:
 535         s->out_format = FMT_H263;
 536         s->h263_msmpeg4 = 1;
 537         s->h263_pred = 1;
 538         s->unrestricted_mv = 1;
 539         s->msmpeg4_version= 3;
 540         avctx->delay=0;
 541         break;
 542     case CODEC_ID_WMV1:
 543         s->out_format = FMT_H263;
 544         s->h263_msmpeg4 = 1;
 545         s->h263_pred = 1;
 546         s->unrestricted_mv = 1;
 547         s->msmpeg4_version= 4;
 548         avctx->delay=0;
 549         break;
 550     case CODEC_ID_WMV2:
 551         s->out_format = FMT_H263;
 552         s->h263_msmpeg4 = 1;
 553         s->h263_pred = 1;
 554         s->unrestricted_mv = 1;
 555         s->msmpeg4_version= 5;
 556         avctx->delay=0;
 557         break;
 558     default:
 559         return -1;
 560     }
 561
 562     { /* set up some save defaults, some codecs might override them later */
 563         static int done=0;
 564         if(!done){
 565             int i;
 566             done=1;
 567             memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1));
 568             memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
 569
 570             for(i=-16; i<16; i++){
 571                 default_fcode_tab[i + MAX_MV]= 1;
 572             }
 573         }
 574     }
 575     s->mv_penalty= default_mv_penalty;
 576     s->fcode_tab= default_fcode_tab;
 577     s->y_dc_scale_table=
 578     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 579
 580     if (s->out_format == FMT_H263)
 581         h263_encode_init(s);
 582     else if (s->out_format == FMT_MPEG1)
 583         ff_mpeg1_encode_init(s);
 584     if(s->msmpeg4_version)
 585         ff_msmpeg4_encode_init(s);
 586
 587     /* dont use mv_penalty table for crap MV as it would be confused */
 588     if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
 589
 590     s->encoding = 1;
 591
 592     /* init */
 593     if (MPV_common_init(s) < 0)
 594         return -1;
 595
 596     /* init default q matrix */
 597     for(i=0;i<64;i++) {
 598         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 599             s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i];
 600             s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i];
 601         }else if(s->out_format == FMT_H263){
 602             s->intra_matrix[i] =
 603             s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
 604         }else{ /* mpeg1 */
 605             s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i];
 606             s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
 607         }
 608     }
 609
 610     /* precompute matrix */
 611     /* for mjpeg, we do include qscale in the matrix */
 612     if (s->out_format != FMT_MJPEG) {
 613         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 614                        s->intra_matrix, s->intra_quant_bias);
 615         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 616                        s->inter_matrix, s->inter_quant_bias);
 617     }
 618
 619     if(ff_rate_control_init(s) < 0)
 620         return -1;
 621
 622     s->picture_number = 0;
 623     s->picture_in_gop_number = 0;
 624     s->fake_picture_number = 0;
 625     /* motion detector init */
 626     s->f_code = 1;
 627     s->b_code = 1;
 628
 629     return 0;
 630 }
 631
 632 int MPV_encode_end(AVCodecContext *avctx)
 633 {
 634     MpegEncContext *s = avctx->priv_data;
 635
 636 #ifdef STATS
 637     print_stats();
 638 #endif
 639
 640     ff_rate_control_uninit(s);
 641
 642     MPV_common_end(s);
 643     if (s->out_format == FMT_MJPEG)
 644         mjpeg_close(s);
 645
 646     return 0;
 647 }
 648
 649 /* draw the edges of width 'w' of an image of size width, height */
 650 //FIXME check that this is ok for mpeg4 interlaced
 651 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
 652 {
 653     UINT8 *ptr, *last_line;
 654     int i;
 655
 656     last_line = buf + (height - 1) * wrap;
 657     for(i=0;i<w;i++) {
 658         /* top and bottom */
 659         memcpy(buf - (i + 1) * wrap, buf, width);
 660         memcpy(last_line + (i + 1) * wrap, last_line, width);
 661     }
 662     /* left and right */
 663     ptr = buf;
 664     for(i=0;i<height;i++) {
 665         memset(ptr - w, ptr[0], w);
 666         memset(ptr + width, ptr[width-1], w);
 667         ptr += wrap;
 668     }
 669     /* corners */
 670     for(i=0;i<w;i++) {
 671         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 672         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 673         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 674         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 675     }
 676 }
 677
 678 /* generic function for encode/decode called before a frame is coded/decoded */
 679 void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 680 {
 681     int i;
 682     UINT8 *tmp;
 683
 684     s->mb_skiped = 0;
 685     s->decoding_error=0;
 686     avctx->mbskip_table= s->mbskip_table;
 687
 688     if(avctx->flags&CODEC_FLAG_DR1){
 689         avctx->get_buffer_callback(avctx, s->width, s->height, s->pict_type);
 690
 691         s->linesize  = avctx->dr_stride;
 692         s->uvlinesize= avctx->dr_uvstride;
 693         s->ip_buffer_count= avctx->dr_ip_buffer_count;
 694     }
 695     avctx->dr_ip_buffer_count= s->ip_buffer_count;
 696
 697     if (s->pict_type == B_TYPE) {
 698         for(i=0;i<3;i++) {
 699             if(avctx->flags&CODEC_FLAG_DR1)
 700                 s->aux_picture[i]= avctx->dr_buffer[i];
 701
 702             //FIXME the following should never be needed, the decoder should drop b frames if no reference is available
 703             if(s->next_picture[i]==NULL)
 704                 s->next_picture[i]= s->aux_picture[i];
 705             if(s->last_picture[i]==NULL)
 706                 s->last_picture[i]= s->next_picture[i];
 707
 708             s->current_picture[i] = s->aux_picture[i];
 709         }
 710     } else {
 711         for(i=0;i<3;i++) {
 712             /* swap next and last */
 713             if(avctx->flags&CODEC_FLAG_DR1)
 714                 tmp= avctx->dr_buffer[i];
 715             else
 716                 tmp = s->last_picture[i];
 717
 718             s->last_picture[i] = s->next_picture[i];
 719             s->next_picture[i] = tmp;
 720             s->current_picture[i] = tmp;
 721
 722             if(s->last_picture[i]==NULL)
 723                 s->last_picture[i]= s->next_picture[i];
 724
 725             s->last_dr_opaque= s->next_dr_opaque;
 726             s->next_dr_opaque= avctx->dr_opaque_frame;
 727
 728             if(s->has_b_frames && s->last_dr_opaque && s->codec_id!=CODEC_ID_SVQ1)
 729                 avctx->dr_opaque_frame= s->last_dr_opaque;
 730             else
 731                 avctx->dr_opaque_frame= s->next_dr_opaque;
 732         }
 733     }
 734     /* set dequantizer, we cant do it during init as it might change for mpeg4
 735        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
 736     if(s->out_format == FMT_H263){
 737         if(s->mpeg_quant)
 738             s->dct_unquantize = s->dct_unquantize_mpeg2;
 739         else
 740             s->dct_unquantize = s->dct_unquantize_h263;
 741     }else
 742         s->dct_unquantize = s->dct_unquantize_mpeg1;
 743 }
 744
 745 /* generic function for encode/decode called after a frame has been coded/decoded */
 746 void MPV_frame_end(MpegEncContext *s)
 747 {
 748     s->avctx->key_frame   = (s->pict_type == I_TYPE);
 749     s->avctx->pict_type   = s->pict_type;
 750
 751     /* draw edge for correct motion prediction if outside */
 752     if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
 753         draw_edges(s->current_picture[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
 754         draw_edges(s->current_picture[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 755         draw_edges(s->current_picture[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 756     }
 757     emms_c();
 758
 759     s->last_pict_type    = s->pict_type;
 760     if(s->pict_type!=B_TYPE){
 761         s->last_non_b_pict_type= s->pict_type;
 762         s->num_available_buffers++;
 763         if(s->num_available_buffers>2) s->num_available_buffers= 2;
 764     }
 765 }
 766
 767 /* reorder input for encoding */
 768 void reorder_input(MpegEncContext *s, AVPicture *pict)
 769 {
 770     int i, j, index;
 771
 772     if(s->max_b_frames > FF_MAX_B_FRAMES) s->max_b_frames= FF_MAX_B_FRAMES;
 773
 774 //        delay= s->max_b_frames+1; (or 0 if no b frames cuz decoder diff)
 775
 776     for(j=0; j<REORDER_BUFFER_SIZE-1; j++){
 777         s->coded_order[j]= s->coded_order[j+1];
 778     }
 779     s->coded_order[j].picture[0]= s->coded_order[j].picture[1]= s->coded_order[j].picture[2]= NULL; //catch uninitalized buffers
 780     s->coded_order[j].pict_type=0;
 781
 782     switch(s->input_pict_type){
 783     default:
 784     case I_TYPE:
 785     case S_TYPE:
 786     case P_TYPE:
 787         index= s->max_b_frames - s->b_frames_since_non_b;
 788         s->b_frames_since_non_b=0;
 789         break;
 790     case B_TYPE:
 791         index= s->max_b_frames + 1;
 792         s->b_frames_since_non_b++;
 793         break;
 794     }
 795 //printf("index:%d type:%d strides: %d %d\n", index, s->input_pict_type, pict->linesize[0], s->linesize);
 796     if(   (index==0 || (s->flags&CODEC_FLAG_INPUT_PRESERVED))
 797        && pict->linesize[0] == s->linesize
 798        && pict->linesize[1] == s->uvlinesize
 799        && pict->linesize[2] == s->uvlinesize){
 800 //printf("ptr\n");
 801         for(i=0; i<3; i++){
 802             s->coded_order[index].picture[i]= pict->data[i];
 803         }
 804     }else{
 805 //printf("copy\n");
 806         for(i=0; i<3; i++){
 807             uint8_t *src = pict->data[i];
 808             uint8_t *dest;
 809             int src_wrap = pict->linesize[i];
 810             int dest_wrap = s->linesize;
 811             int w = s->width;
 812             int h = s->height;
 813
 814             if(index==0) dest= s->last_picture[i]+16; //is current_picture indeed but the switch hapens after reordering
 815             else         dest= s->picture_buffer[s->picture_buffer_index][i];
 816
 817             if (i >= 1) {
 818                 dest_wrap >>= 1;
 819                 w >>= 1;
 820                 h >>= 1;
 821             }
 822
 823             s->coded_order[index].picture[i]= dest;
 824             for(j=0;j<h;j++) {
 825                 memcpy(dest, src, w);
 826                 dest += dest_wrap;
 827                 src += src_wrap;
 828             }
 829         }
 830         if(index!=0){
 831             s->picture_buffer_index++;
 832             if(s->picture_buffer_index >= REORDER_BUFFER_SIZE) s->picture_buffer_index=0;
 833         }
 834     }
 835     s->coded_order[index].pict_type = s->input_pict_type;
 836     s->coded_order[index].qscale    = s->input_qscale;
 837     s->coded_order[index].force_type= s->force_input_type;
 838     s->coded_order[index].picture_in_gop_number= s->input_picture_in_gop_number;
 839     s->coded_order[index].picture_number= s->input_picture_number;
 840
 841     for(i=0; i<3; i++){
 842         s->new_picture[i]= s->coded_order[0].picture[i];
 843     }
 844 }
 845
 846 int MPV_encode_picture(AVCodecContext *avctx,
 847                        unsigned char *buf, int buf_size, void *data)
 848 {
 849     MpegEncContext *s = avctx->priv_data;
 850     AVPicture *pict = data;
 851
 852     s->input_qscale = avctx->quality;
 853
 854     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
 855
 856     if(avctx->flags&CODEC_FLAG_TYPE){
 857         s->input_pict_type=
 858         s->force_input_type= avctx->key_frame ? I_TYPE : P_TYPE;
 859     }else if(s->flags&CODEC_FLAG_PASS2){
 860         s->input_pict_type=
 861         s->force_input_type= s->rc_context.entry[s->input_picture_number].new_pict_type;
 862     }else{
 863         s->force_input_type=0;
 864         if (!s->intra_only) {
 865             /* first picture of GOP is intra */
 866             if (s->input_picture_in_gop_number % s->gop_size==0){
 867                 s->input_pict_type = I_TYPE;
 868             }else if(s->max_b_frames==0){
 869                 s->input_pict_type = P_TYPE;
 870             }else{
 871                 if(s->b_frames_since_non_b < s->max_b_frames) //FIXME more IQ
 872                     s->input_pict_type = B_TYPE;
 873                 else
 874                     s->input_pict_type = P_TYPE;
 875             }
 876         } else {
 877             s->input_pict_type = I_TYPE;
 878         }
 879     }
 880
 881     if(s->input_pict_type==I_TYPE)
 882         s->input_picture_in_gop_number=0;
 883
 884     reorder_input(s, pict);
 885
 886     /* output? */
 887     if(s->coded_order[0].picture[0]){
 888
 889         s->pict_type= s->coded_order[0].pict_type;
 890         if (s->fixed_qscale) /* the ratecontrol needs the last qscale so we dont touch it for CBR */
 891             s->qscale= s->coded_order[0].qscale;
 892         s->force_type= s->coded_order[0].force_type;
 893         s->picture_in_gop_number= s->coded_order[0].picture_in_gop_number;
 894         s->picture_number= s->coded_order[0].picture_number;
 895
 896         MPV_frame_start(s, avctx);
 897
 898         encode_picture(s, s->picture_number);
 899
 900         avctx->real_pict_num  = s->picture_number;
 901         avctx->header_bits = s->header_bits;
 902         avctx->mv_bits     = s->mv_bits;
 903         avctx->misc_bits   = s->misc_bits;
 904         avctx->i_tex_bits  = s->i_tex_bits;
 905         avctx->p_tex_bits  = s->p_tex_bits;
 906         avctx->i_count     = s->i_count;
 907         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
 908         avctx->skip_count  = s->skip_count;
 909
 910         MPV_frame_end(s);
 911
 912         if (s->out_format == FMT_MJPEG)
 913             mjpeg_picture_trailer(s);
 914
 915         if(!s->fixed_qscale)
 916             avctx->quality = s->qscale;
 917
 918         if(s->flags&CODEC_FLAG_PASS1)
 919             ff_write_pass1_stats(s);
 920
 921     }
 922
 923     s->input_picture_number++;
 924     s->input_picture_in_gop_number++;
 925
 926     flush_put_bits(&s->pb);
 927     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
 928
 929     s->total_bits += s->frame_bits;
 930     avctx->frame_bits  = s->frame_bits;
 931 //printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n",
 932 //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
 933 #if 0 //dump some stats to stats.txt for testing/debuging
 934 if(s->max_b_frames==0)
 935 {
 936     static FILE *f=NULL;
 937     if(!f) f= fopen("stats.txt", "wb");
 938     get_psnr(pict->data, s->current_picture,
 939              pict->linesize, s->linesize, avctx);
 940     fprintf(f, "%7d, %7d, %2.4f\n", pbBufPtr(&s->pb) - s->pb.buf, s->qscale, avctx->psnr_y);
 941 }
 942 #endif
 943
 944     if (avctx->get_psnr) {
 945         /* At this point pict->data should have the original frame   */
 946         /* an s->current_picture should have the coded/decoded frame */
 947         get_psnr(pict->data, s->current_picture,
 948                  pict->linesize, s->linesize, avctx);
 949 //        printf("%f\n", avctx->psnr_y);
 950     }
 951     return pbBufPtr(&s->pb) - s->pb.buf;
 952 }
 953
 954 static inline void gmc1_motion(MpegEncContext *s,
 955                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
 956                                int dest_offset,
 957                                UINT8 **ref_picture, int src_offset,
 958                                int h)
 959 {
 960     UINT8 *ptr;
 961     int offset, src_x, src_y, linesize, uvlinesize;
 962     int motion_x, motion_y;
 963     int emu=0;
 964
 965     if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n");
 966     motion_x= s->sprite_offset[0][0];
 967     motion_y= s->sprite_offset[0][1];
 968     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
 969     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
 970     motion_x<<=(3-s->sprite_warping_accuracy);
 971     motion_y<<=(3-s->sprite_warping_accuracy);
 972     src_x = clip(src_x, -16, s->width);
 973     if (src_x == s->width)
 974         motion_x =0;
 975     src_y = clip(src_y, -16, s->height);
 976     if (src_y == s->height)
 977         motion_y =0;
 978
 979     linesize = s->linesize;
 980     uvlinesize = s->uvlinesize;
 981     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
 982
 983     dest_y+=dest_offset;
 984     if(s->flags&CODEC_FLAG_EMU_EDGE){
 985         if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
 986                               || src_y + (motion_y&15) + h  > s->v_edge_pos){
 987             emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
 988             ptr= s->edge_emu_buffer;
 989             emu=1;
 990         }
 991     }
 992     gmc1(dest_y  , ptr  , linesize, h, motion_x&15, motion_y&15, s->no_rounding);
 993     gmc1(dest_y+8, ptr+8, linesize, h, motion_x&15, motion_y&15, s->no_rounding);
 994
 995     motion_x= s->sprite_offset[1][0];
 996     motion_y= s->sprite_offset[1][1];
 997     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
 998     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
 999     motion_x<<=(3-s->sprite_warping_accuracy);
1000     motion_y<<=(3-s->sprite_warping_accuracy);
1001     src_x = clip(src_x, -8, s->width>>1);
1002     if (src_x == s->width>>1)
1003         motion_x =0;
1004     src_y = clip(src_y, -8, s->height>>1);
1005     if (src_y == s->height>>1)
1006         motion_y =0;
1007
1008     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1009     ptr = ref_picture[1] + offset;
1010     if(emu){
1011         emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1012         ptr= s->edge_emu_buffer;
1013     }
1014     gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, h>>1, motion_x&15, motion_y&15, s->no_rounding);
1015
1016     ptr = ref_picture[2] + offset;
1017     if(emu){
1018         emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1019         ptr= s->edge_emu_buffer;
1020     }
1021     gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, h>>1, motion_x&15, motion_y&15, s->no_rounding);
1022
1023     return;
1024 }
1025
1026 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
1027                                     int src_x, int src_y, int w, int h){
1028     int x, y;
1029     int start_y, start_x, end_y, end_x;
1030     UINT8 *buf= s->edge_emu_buffer;
1031
1032     if(src_y>= h){
1033         src+= (h-1-src_y)*linesize;
1034         src_y=h-1;
1035     }else if(src_y<=-block_h){
1036         src+= (1-block_h-src_y)*linesize;
1037         src_y=1-block_h;
1038     }
1039     if(src_x>= w){
1040         src+= (w-1-src_x);
1041         src_x=w-1;
1042     }else if(src_x<=-block_w){
1043         src+= (1-block_w-src_x);
1044         src_x=1-block_w;
1045     }
1046
1047     start_y= MAX(0, -src_y);
1048     start_x= MAX(0, -src_x);
1049     end_y= MIN(block_h, h-src_y);
1050     end_x= MIN(block_w, w-src_x);
1051
1052     // copy existing part
1053     for(y=start_y; y<end_y; y++){
1054         for(x=start_x; x<end_x; x++){
1055             buf[x + y*linesize]= src[x + y*linesize];
1056         }
1057     }
1058
1059     //top
1060     for(y=0; y<start_y; y++){
1061         for(x=start_x; x<end_x; x++){
1062             buf[x + y*linesize]= buf[x + start_y*linesize];
1063         }
1064     }
1065
1066     //bottom
1067     for(y=end_y; y<block_h; y++){
1068         for(x=start_x; x<end_x; x++){
1069             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1070         }
1071     }
1072
1073     for(y=0; y<block_h; y++){
1074        //left
1075         for(x=0; x<start_x; x++){
1076             buf[x + y*linesize]= buf[start_x + y*linesize];
1077         }
1078
1079        //right
1080         for(x=end_x; x<block_w; x++){
1081             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1082         }
1083     }
1084 }
1085
1086
1087 /* apply one mpeg motion vector to the three components */
1088 static inline void mpeg_motion(MpegEncContext *s,
1089                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1090                                int dest_offset,
1091                                UINT8 **ref_picture, int src_offset,
1092                                int field_based, op_pixels_func (*pix_op)[4],
1093                                int motion_x, int motion_y, int h)
1094 {
1095     UINT8 *ptr;
1096     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1097     int emu=0;
1098 #if 0
1099 if(s->quarter_sample)
1100 {
1101     motion_x>>=1;
1102     motion_y>>=1;
1103 }
1104 #endif
1105     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1106     src_x = s->mb_x * 16 + (motion_x >> 1);
1107     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1108
1109     /* WARNING: do no forget half pels */
1110     height = s->height >> field_based;
1111     v_edge_pos = s->v_edge_pos >> field_based;
1112     src_x = clip(src_x, -16, s->width);
1113     if (src_x == s->width)
1114         dxy &= ~1;
1115     src_y = clip(src_y, -16, height);
1116     if (src_y == height)
1117         dxy &= ~2;
1118     linesize   = s->linesize << field_based;
1119     uvlinesize = s->uvlinesize << field_based;
1120     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1121     dest_y += dest_offset;
1122
1123     if(s->flags&CODEC_FLAG_EMU_EDGE){
1124         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1125                               || src_y + (motion_y&1) + h  > v_edge_pos){
1126             emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, v_edge_pos);
1127             ptr= s->edge_emu_buffer;
1128             emu=1;
1129         }
1130     }
1131     pix_op[0][dxy](dest_y, ptr, linesize, h);
1132
1133     if(s->flags&CODEC_FLAG_GRAY) return;
1134
1135     if (s->out_format == FMT_H263) {
1136         dxy = 0;
1137         if ((motion_x & 3) != 0)
1138             dxy |= 1;
1139         if ((motion_y & 3) != 0)
1140             dxy |= 2;
1141         mx = motion_x >> 2;
1142         my = motion_y >> 2;
1143     } else {
1144         mx = motion_x / 2;
1145         my = motion_y / 2;
1146         dxy = ((my & 1) << 1) | (mx & 1);
1147         mx >>= 1;
1148         my >>= 1;
1149     }
1150
1151     src_x = s->mb_x * 8 + mx;
1152     src_y = s->mb_y * (8 >> field_based) + my;
1153     src_x = clip(src_x, -8, s->width >> 1);
1154     if (src_x == (s->width >> 1))
1155         dxy &= ~1;
1156     src_y = clip(src_y, -8, height >> 1);
1157     if (src_y == (height >> 1))
1158         dxy &= ~2;
1159     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1160     ptr = ref_picture[1] + offset;
1161     if(emu){
1162         emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
1163         ptr= s->edge_emu_buffer;
1164     }
1165     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1166
1167     ptr = ref_picture[2] + offset;
1168     if(emu){
1169         emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
1170         ptr= s->edge_emu_buffer;
1171     }
1172     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1173 }
1174
1175 static inline void qpel_motion(MpegEncContext *s,
1176                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1177                                int dest_offset,
1178                                UINT8 **ref_picture, int src_offset,
1179                                int field_based, op_pixels_func (*pix_op)[4],
1180                                qpel_mc_func (*qpix_op)[16],
1181                                int motion_x, int motion_y, int h)
1182 {
1183     UINT8 *ptr;
1184     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1185     int emu=0;
1186
1187     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1188     src_x = s->mb_x * 16 + (motion_x >> 2);
1189     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1190
1191     height = s->height >> field_based;
1192     v_edge_pos = s->v_edge_pos >> field_based;
1193     src_x = clip(src_x, -16, s->width);
1194     if (src_x == s->width)
1195         dxy &= ~3;
1196     src_y = clip(src_y, -16, height);
1197     if (src_y == height)
1198         dxy &= ~12;
1199     linesize = s->linesize << field_based;
1200     uvlinesize = s->uvlinesize << field_based;
1201     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1202     dest_y += dest_offset;
1203 //printf("%d %d %d\n", src_x, src_y, dxy);
1204
1205     if(s->flags&CODEC_FLAG_EMU_EDGE){
1206         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1207                               || src_y + (motion_y&3) + h  > v_edge_pos){
1208             emulated_edge_mc(s, ptr, linesize, 17, h+1, src_x, src_y, s->h_edge_pos, v_edge_pos);
1209             ptr= s->edge_emu_buffer;
1210             emu=1;
1211         }
1212     }
1213     if(!field_based)
1214         qpix_op[0][dxy](dest_y, ptr, linesize);
1215     else{
1216         //damn interlaced mode
1217         //FIXME boundary mirroring is not exactly correct here
1218         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1219         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1220     }
1221
1222     if(s->flags&CODEC_FLAG_GRAY) return;
1223
1224     if(field_based){
1225         mx= motion_x/2;
1226         my= motion_y>>1;
1227     }else if(s->divx_version){
1228         mx= (motion_x>>1)|(motion_x&1);
1229         my= (motion_y>>1)|(motion_y&1);
1230     }else{
1231         mx= motion_x/2;
1232         my= motion_y/2;
1233     }
1234     mx= (mx>>1)|(mx&1);
1235     my= (my>>1)|(my&1);
1236     dxy= (mx&1) | ((my&1)<<1);
1237     mx>>=1;
1238     my>>=1;
1239
1240     src_x = s->mb_x * 8 + mx;
1241     src_y = s->mb_y * (8 >> field_based) + my;
1242     src_x = clip(src_x, -8, s->width >> 1);
1243     if (src_x == (s->width >> 1))
1244         dxy &= ~1;
1245     src_y = clip(src_y, -8, height >> 1);
1246     if (src_y == (height >> 1))
1247         dxy &= ~2;
1248
1249     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1250     ptr = ref_picture[1] + offset;
1251     if(emu){
1252         emulated_edge_mc(s, ptr,  uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
1253         ptr= s->edge_emu_buffer;
1254     }
1255     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1256
1257     ptr = ref_picture[2] + offset;
1258     if(emu){
1259         emulated_edge_mc(s, ptr,  uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
1260         ptr= s->edge_emu_buffer;
1261     }
1262     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1263 }
1264
1265
1266 static inline void MPV_motion(MpegEncContext *s,
1267                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1268                               int dir, UINT8 **ref_picture,
1269                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
1270 {
1271     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
1272     int mb_x, mb_y, i;
1273     UINT8 *ptr, *dest;
1274     int emu=0;
1275
1276     mb_x = s->mb_x;
1277     mb_y = s->mb_y;
1278
1279     switch(s->mv_type) {
1280     case MV_TYPE_16X16:
1281         if(s->mcsel){
1282             gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
1283                         ref_picture, 0,
1284                         16);
1285         }else if(s->quarter_sample){
1286             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1287                         ref_picture, 0,
1288                         0, pix_op, qpix_op,
1289                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1290         }else{
1291             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1292                         ref_picture, 0,
1293                         0, pix_op,
1294                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1295         }
1296         break;
1297     case MV_TYPE_8X8:
1298         mx = 0;
1299         my = 0;
1300         if(s->quarter_sample){
1301             for(i=0;i<4;i++) {
1302                 motion_x = s->mv[dir][i][0];
1303                 motion_y = s->mv[dir][i][1];
1304
1305                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1306                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
1307                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
1308
1309                 /* WARNING: do no forget half pels */
1310                 src_x = clip(src_x, -16, s->width);
1311                 if (src_x == s->width)
1312                     dxy &= ~3;
1313                 src_y = clip(src_y, -16, s->height);
1314                 if (src_y == s->height)
1315                     dxy &= ~12;
1316
1317                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1318                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1319                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1320                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1321                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1322                         ptr= s->edge_emu_buffer;
1323                     }
1324                 }
1325                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1326                 qpix_op[1][dxy](dest, ptr, s->linesize);
1327
1328                 mx += s->mv[dir][i][0]/2;
1329                 my += s->mv[dir][i][1]/2;
1330             }
1331         }else{
1332             for(i=0;i<4;i++) {
1333                 motion_x = s->mv[dir][i][0];
1334                 motion_y = s->mv[dir][i][1];
1335
1336                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1337                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
1338                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
1339
1340                 /* WARNING: do no forget half pels */
1341                 src_x = clip(src_x, -16, s->width);
1342                 if (src_x == s->width)
1343                     dxy &= ~1;
1344                 src_y = clip(src_y, -16, s->height);
1345                 if (src_y == s->height)
1346                     dxy &= ~2;
1347
1348                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1349                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1350                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1351                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1352                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1353                         ptr= s->edge_emu_buffer;
1354                     }
1355                 }
1356                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1357                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
1358
1359                 mx += s->mv[dir][i][0];
1360                 my += s->mv[dir][i][1];
1361             }
1362         }
1363
1364         if(s->flags&CODEC_FLAG_GRAY) break;
1365         /* In case of 8X8, we construct a single chroma motion vector
1366            with a special rounding */
1367         for(i=0;i<4;i++) {
1368         }
1369         if (mx >= 0)
1370             mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1371         else {
1372             mx = -mx;
1373             mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1374         }
1375         if (my >= 0)
1376             my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1377         else {
1378             my = -my;
1379             my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1380         }
1381         dxy = ((my & 1) << 1) | (mx & 1);
1382         mx >>= 1;
1383         my >>= 1;
1384
1385         src_x = mb_x * 8 + mx;
1386         src_y = mb_y * 8 + my;
1387         src_x = clip(src_x, -8, s->width/2);
1388         if (src_x == s->width/2)
1389             dxy &= ~1;
1390         src_y = clip(src_y, -8, s->height/2);
1391         if (src_y == s->height/2)
1392             dxy &= ~2;
1393
1394         offset = (src_y * (s->uvlinesize)) + src_x;
1395         ptr = ref_picture[1] + offset;
1396         if(s->flags&CODEC_FLAG_EMU_EDGE){
1397                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
1398                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
1399                     emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1400                     ptr= s->edge_emu_buffer;
1401                     emu=1;
1402                 }
1403             }
1404         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
1405
1406         ptr = ref_picture[2] + offset;
1407         if(emu){
1408             emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1409             ptr= s->edge_emu_buffer;
1410         }
1411         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
1412         break;
1413     case MV_TYPE_FIELD:
1414         if (s->picture_structure == PICT_FRAME) {
1415             if(s->quarter_sample){
1416                 /* top field */
1417                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1418                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1419                             1, pix_op, qpix_op,
1420                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1421                 /* bottom field */
1422                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1423                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1424                             1, pix_op, qpix_op,
1425                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1426             }else{
1427                 /* top field */
1428                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1429                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1430                             1, pix_op,
1431                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1432                 /* bottom field */
1433                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1434                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1435                             1, pix_op,
1436                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1437             }
1438         } else {
1439
1440
1441         }
1442         break;
1443     }
1444 }
1445
1446
1447 /* put block[] to dest[] */
1448 static inline void put_dct(MpegEncContext *s,
1449                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1450 {
1451     if (!s->mpeg2)
1452         s->dct_unquantize(s, block, i, s->qscale);
1453     ff_idct_put (dest, line_size, block);
1454 }
1455
1456 /* add block[] to dest[] */
1457 static inline void add_dct(MpegEncContext *s,
1458                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1459 {
1460     if (s->block_last_index[i] >= 0) {
1461         ff_idct_add (dest, line_size, block);
1462     }
1463 }
1464
1465 static inline void add_dequant_dct(MpegEncContext *s,
1466                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1467 {
1468     if (s->block_last_index[i] >= 0) {
1469         s->dct_unquantize(s, block, i, s->qscale);
1470
1471         ff_idct_add (dest, line_size, block);
1472     }
1473 }
1474
1475 /**
1476  * cleans dc, ac, coded_block for the current non intra MB
1477  */
1478 void ff_clean_intra_table_entries(MpegEncContext *s)
1479 {
1480     int wrap = s->block_wrap[0];
1481     int xy = s->block_index[0];
1482
1483     s->dc_val[0][xy           ] =
1484     s->dc_val[0][xy + 1       ] =
1485     s->dc_val[0][xy     + wrap] =
1486     s->dc_val[0][xy + 1 + wrap] = 1024;
1487     /* ac pred */
1488     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(INT16));
1489     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(INT16));
1490     if (s->msmpeg4_version>=3) {
1491         s->coded_block[xy           ] =
1492         s->coded_block[xy + 1       ] =
1493         s->coded_block[xy     + wrap] =
1494         s->coded_block[xy + 1 + wrap] = 0;
1495     }
1496     /* chroma */
1497     wrap = s->block_wrap[4];
1498     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
1499     s->dc_val[1][xy] =
1500     s->dc_val[2][xy] = 1024;
1501     /* ac pred */
1502     memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
1503     memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
1504
1505     s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
1506 }
1507
1508 /* generic function called after a macroblock has been parsed by the
1509    decoder or after it has been encoded by the encoder.
1510
1511    Important variables used:
1512    s->mb_intra : true if intra macroblock
1513    s->mv_dir   : motion vector direction
1514    s->mv_type  : motion vector type
1515    s->mv       : motion vector
1516    s->interlaced_dct : true if interlaced dct used (mpeg2)
1517  */
1518 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1519 {
1520     int mb_x, mb_y;
1521     const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
1522
1523     mb_x = s->mb_x;
1524     mb_y = s->mb_y;
1525
1526 #ifdef FF_POSTPROCESS
1527     /* Obsolete. Exists for compatibility with mplayer only. */
1528     quant_store[mb_y][mb_x]=s->qscale;
1529     //printf("[%02d][%02d] %d\n",mb_x,mb_y,s->qscale);
1530 #else
1531     /* even more obsolete, exists for mplayer xp only */
1532     if(s->avctx->quant_store) s->avctx->quant_store[mb_y*s->avctx->qstride+mb_x] = s->qscale;
1533 #endif
1534     s->qscale_table[mb_xy]= s->qscale;
1535
1536     /* update DC predictors for P macroblocks */
1537     if (!s->mb_intra) {
1538         if (s->h263_pred || s->h263_aic) {
1539             if(s->mbintra_table[mb_xy])
1540                 ff_clean_intra_table_entries(s);
1541         } else {
1542             s->last_dc[0] =
1543             s->last_dc[1] =
1544             s->last_dc[2] = 128 << s->intra_dc_precision;
1545         }
1546     }
1547     else if (s->h263_pred || s->h263_aic)
1548         s->mbintra_table[mb_xy]=1;
1549
1550     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
1551     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
1552
1553         const int wrap = s->block_wrap[0];
1554         const int xy = s->block_index[0];
1555         const int mb_index= s->mb_x + s->mb_y*s->mb_width;
1556         if(s->mv_type == MV_TYPE_8X8){
1557             s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
1558         } else {
1559             int motion_x, motion_y;
1560             if (s->mb_intra) {
1561                 motion_x = 0;
1562                 motion_y = 0;
1563                 if(s->co_located_type_table)
1564                     s->co_located_type_table[mb_index]= 0;
1565             } else if (s->mv_type == MV_TYPE_16X16) {
1566                 motion_x = s->mv[0][0][0];
1567                 motion_y = s->mv[0][0][1];
1568                 if(s->co_located_type_table)
1569                     s->co_located_type_table[mb_index]= 0;
1570             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
1571                 int i;
1572                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
1573                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
1574                 motion_x = (motion_x>>1) | (motion_x&1);
1575                 for(i=0; i<2; i++){
1576                     s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
1577                     s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
1578                     s->field_select_table[mb_index][i]= s->field_select[0][i];
1579                 }
1580                 s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
1581             }
1582             /* no update if 8X8 because it has been done during parsing */
1583             s->motion_val[xy][0] = motion_x;
1584             s->motion_val[xy][1] = motion_y;
1585             s->motion_val[xy + 1][0] = motion_x;
1586             s->motion_val[xy + 1][1] = motion_y;
1587             s->motion_val[xy + wrap][0] = motion_x;
1588             s->motion_val[xy + wrap][1] = motion_y;
1589             s->motion_val[xy + 1 + wrap][0] = motion_x;
1590             s->motion_val[xy + 1 + wrap][1] = motion_y;
1591         }
1592     }
1593
1594     if (!(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) {
1595         UINT8 *dest_y, *dest_cb, *dest_cr;
1596         int dct_linesize, dct_offset;
1597         op_pixels_func (*op_pix)[4];
1598         qpel_mc_func (*op_qpix)[16];
1599
1600         /* avoid copy if macroblock skipped in last frame too
1601            dont touch it for B-frames as they need the skip info from the next p-frame */
1602         if (s->pict_type != B_TYPE) {
1603             UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy];
1604             if (s->mb_skiped) {
1605                 s->mb_skiped = 0;
1606
1607                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
1608                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1609
1610                 /* if previous was skipped too, then nothing to do !
1611                    skip only during decoding as we might trash the buffers during encoding a bit */
1612                 if (*mbskip_ptr >= s->ip_buffer_count  && !s->encoding)
1613                     goto the_end;
1614             } else {
1615                 *mbskip_ptr = 0; /* not skipped */
1616             }
1617         }
1618
1619         dest_y = s->current_picture [0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
1620         dest_cb = s->current_picture[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
1621         dest_cr = s->current_picture[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
1622
1623         if (s->interlaced_dct) {
1624             dct_linesize = s->linesize * 2;
1625             dct_offset = s->linesize;
1626         } else {
1627             dct_linesize = s->linesize;
1628             dct_offset = s->linesize * 8;
1629         }
1630
1631         if (!s->mb_intra) {
1632             /* motion handling */
1633             /* decoding or more than one mb_type (MC was allready done otherwise) */
1634             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
1635                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
1636                     op_pix = put_pixels_tab;
1637                     op_qpix= put_qpel_pixels_tab;
1638                 }else{
1639                     op_pix = put_no_rnd_pixels_tab;
1640                     op_qpix= put_no_rnd_qpel_pixels_tab;
1641                 }
1642
1643                 if (s->mv_dir & MV_DIR_FORWARD) {
1644                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
1645                     op_pix = avg_pixels_tab;
1646                     op_qpix= avg_qpel_pixels_tab;
1647                 }
1648                 if (s->mv_dir & MV_DIR_BACKWARD) {
1649                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
1650                 }
1651             }
1652
1653             /* skip dequant / idct if we are really late ;) */
1654             if(s->hurry_up>1) goto the_end;
1655
1656             /* add dct residue */
1657             if(s->encoding || !(s->mpeg2 || s->h263_msmpeg4 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1658                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
1659                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1660                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1661                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1662
1663                 if(!(s->flags&CODEC_FLAG_GRAY)){
1664                     add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1665                     add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1666                 }
1667             } else {
1668                 add_dct(s, block[0], 0, dest_y, dct_linesize);
1669                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1670                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1671                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1672
1673                 if(!(s->flags&CODEC_FLAG_GRAY)){
1674                     add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1675                     add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1676                 }
1677             }
1678         } else {
1679             /* dct only in intra block */
1680             put_dct(s, block[0], 0, dest_y, dct_linesize);
1681             put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
1682             put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
1683             put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
1684
1685             if(!(s->flags&CODEC_FLAG_GRAY)){
1686                 put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
1687                 put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
1688             }
1689         }
1690     }
1691  the_end:
1692     emms_c(); //FIXME remove
1693 }
1694
1695 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
1696 {
1697     static const char tab[64]=
1698         {3,2,2,1,1,1,1,1,
1699          1,1,1,1,1,1,1,1,
1700          1,1,1,1,1,1,1,1,
1701          0,0,0,0,0,0,0,0,
1702          0,0,0,0,0,0,0,0,
1703          0,0,0,0,0,0,0,0,
1704          0,0,0,0,0,0,0,0,
1705          0,0,0,0,0,0,0,0};
1706     int score=0;
1707     int run=0;
1708     int i;
1709     DCTELEM *block= s->block[n];
1710     const int last_index= s->block_last_index[n];
1711     int skip_dc;
1712
1713     if(threshold<0){
1714         skip_dc=0;
1715         threshold= -threshold;
1716     }else
1717         skip_dc=1;
1718
1719     /* are all which we could set to zero are allready zero? */
1720     if(last_index<=skip_dc - 1) return;
1721
1722     for(i=0; i<=last_index; i++){
1723         const int j = zigzag_direct[i];
1724         const int level = ABS(block[j]);
1725         if(level==1){
1726             if(skip_dc && i==0) continue;
1727             score+= tab[run];
1728             run=0;
1729         }else if(level>1){
1730             return;
1731         }else{
1732             run++;
1733         }
1734     }
1735     if(score >= threshold) return;
1736     for(i=skip_dc; i<=last_index; i++){
1737         const int j = zigzag_direct[i];
1738         block[j]=0;
1739     }
1740     if(block[0]) s->block_last_index[n]= 0;
1741     else         s->block_last_index[n]= -1;
1742 }
1743
1744 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
1745 {
1746     int i;
1747     const int maxlevel= s->max_qcoeff;
1748     const int minlevel= s->min_qcoeff;
1749
1750     for(i=0;i<=last_index; i++){
1751         const int j = zigzag_direct[i];
1752         int level = block[j];
1753
1754         if     (level>maxlevel) level=maxlevel;
1755         else if(level<minlevel) level=minlevel;
1756         block[j]= level;
1757     }
1758 }
1759
1760 static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
1761 {
1762     int i;
1763
1764     if(s->mb_intra){
1765         //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
1766         i=1;
1767     }else
1768         i=0;
1769
1770     for(;i<=s->block_last_index[n]; i++){
1771         const int j = zigzag_direct[i];
1772         int level = block[j];
1773
1774         block[j]= ROUNDED_DIV(level*oldq, newq);
1775     }
1776
1777     for(i=s->block_last_index[n]; i>=0; i--){
1778         const int j = zigzag_direct[i]; //FIXME other scantabs
1779         if(block[j]) break;
1780     }
1781     s->block_last_index[n]= i;
1782 }
1783
1784 static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64])
1785 {
1786     int i,n, newq;
1787     const int maxlevel= s->max_qcoeff;
1788     const int minlevel= s->min_qcoeff;
1789     int largest=0, smallest=0;
1790
1791     assert(s->adaptive_quant);
1792
1793     for(n=0; n<6; n++){
1794         if(s->mb_intra) i=1;
1795         else            i=0;
1796
1797         for(;i<=s->block_last_index[n]; i++){
1798             const int j = zigzag_direct[i]; //FIXME other scantabs
1799             int level = block[n][j];
1800             if(largest  < level) largest = level;
1801             if(smallest > level) smallest= level;
1802         }
1803     }
1804
1805     for(newq=s->qscale+1; newq<32; newq++){
1806         if(   ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel
1807            && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel)
1808             break;
1809     }
1810
1811     if(s->out_format==FMT_H263){
1812         /* h263 like formats cannot change qscale by more than 2 easiely */
1813         if(s->avctx->qmin + 2 < newq)
1814             newq= s->avctx->qmin + 2;
1815     }
1816
1817     for(n=0; n<6; n++){
1818         requantize_coeffs(s, block[n], s->qscale, newq, n);
1819         clip_coeffs(s, block[n], s->block_last_index[n]);
1820     }
1821
1822     s->dquant+= newq - s->qscale;
1823     s->qscale= newq;
1824 }
1825 #if 0
1826 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
1827     int score=0;
1828     int x,y;
1829
1830     for(y=0; y<7; y++){
1831         for(x=0; x<16; x+=4){
1832             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
1833                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
1834         }
1835         s+= stride;
1836     }
1837
1838     return score;
1839 }
1840
1841 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
1842     int score=0;
1843     int x,y;
1844
1845     for(y=0; y<7; y++){
1846         for(x=0; x<16; x++){
1847             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
1848         }
1849         s1+= stride;
1850         s2+= stride;
1851     }
1852
1853     return score;
1854 }
1855 #else
1856 #define SQ(a) ((a)*(a))
1857
1858 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
1859     int score=0;
1860     int x,y;
1861
1862     for(y=0; y<7; y++){
1863         for(x=0; x<16; x+=4){
1864             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
1865                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
1866         }
1867         s+= stride;
1868     }
1869
1870     return score;
1871 }
1872
1873 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
1874     int score=0;
1875     int x,y;
1876
1877     for(y=0; y<7; y++){
1878         for(x=0; x<16; x++){
1879             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
1880         }
1881         s1+= stride;
1882         s2+= stride;
1883     }
1884
1885     return score;
1886 }
1887
1888 #endif
1889 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
1890 {
1891     const int mb_x= s->mb_x;
1892     const int mb_y= s->mb_y;
1893     int i;
1894     int skip_dct[6];
1895     int dct_offset   = s->linesize*8; //default for progressive frames
1896
1897     for(i=0; i<6; i++) skip_dct[i]=0;
1898
1899     if(s->adaptive_quant){
1900         s->dquant= s->qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
1901
1902         if(s->out_format==FMT_H263){
1903             if     (s->dquant> 2) s->dquant= 2;
1904             else if(s->dquant<-2) s->dquant=-2;
1905         }
1906
1907         if(s->codec_id==CODEC_ID_MPEG4){
1908             if(!s->mb_intra){
1909                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
1910
1911                 if(s->mv_dir&MV_DIRECT)
1912                     s->dquant=0;
1913             }
1914         }
1915         s->qscale+= s->dquant;
1916         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
1917         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
1918     }
1919
1920     if (s->mb_intra) {
1921         UINT8 *ptr;
1922         int wrap_y;
1923         int emu=0;
1924
1925         wrap_y = s->linesize;
1926         ptr = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
1927
1928         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
1929             emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
1930             ptr= s->edge_emu_buffer;
1931             emu=1;
1932         }
1933
1934         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
1935             int progressive_score, interlaced_score;
1936
1937             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
1938             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
1939
1940             if(progressive_score > interlaced_score + 100){
1941                 s->interlaced_dct=1;
1942
1943                 dct_offset= wrap_y;
1944                 wrap_y<<=1;
1945             }else
1946                 s->interlaced_dct=0;
1947         }
1948
1949         get_pixels(s->block[0], ptr                 , wrap_y);
1950         get_pixels(s->block[1], ptr              + 8, wrap_y);
1951         get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
1952         get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
1953
1954         if(s->flags&CODEC_FLAG_GRAY){
1955             skip_dct[4]= 1;
1956             skip_dct[5]= 1;
1957         }else{
1958             int wrap_c = s->uvlinesize;
1959             ptr = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
1960             if(emu){
1961                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1962                 ptr= s->edge_emu_buffer;
1963             }
1964             get_pixels(s->block[4], ptr, wrap_c);
1965
1966             ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
1967             if(emu){
1968                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1969                 ptr= s->edge_emu_buffer;
1970             }
1971             get_pixels(s->block[5], ptr, wrap_c);
1972         }
1973     }else{
1974         op_pixels_func (*op_pix)[4];
1975         qpel_mc_func (*op_qpix)[16];
1976         UINT8 *dest_y, *dest_cb, *dest_cr;
1977         UINT8 *ptr_y, *ptr_cb, *ptr_cr;
1978         int wrap_y, wrap_c;
1979         int emu=0;
1980
1981         dest_y  = s->current_picture[0] + (mb_y * 16 * s->linesize       ) + mb_x * 16;
1982         dest_cb = s->current_picture[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1983         dest_cr = s->current_picture[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1984         wrap_y = s->linesize;
1985         wrap_c = s->uvlinesize;
1986         ptr_y  = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
1987         ptr_cb = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
1988         ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
1989
1990         if ((!s->no_rounding) || s->pict_type==B_TYPE){
1991             op_pix = put_pixels_tab;
1992             op_qpix= put_qpel_pixels_tab;
1993         }else{
1994             op_pix = put_no_rnd_pixels_tab;
1995             op_qpix= put_no_rnd_qpel_pixels_tab;
1996         }
1997
1998         if (s->mv_dir & MV_DIR_FORWARD) {
1999             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
2000             op_pix = avg_pixels_tab;
2001             op_qpix= avg_qpel_pixels_tab;
2002         }
2003         if (s->mv_dir & MV_DIR_BACKWARD) {
2004             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
2005         }
2006
2007         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2008             emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2009             ptr_y= s->edge_emu_buffer;
2010             emu=1;
2011         }
2012
2013         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2014             int progressive_score, interlaced_score;
2015
2016             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2017                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2018             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2019                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2020
2021             if(progressive_score > interlaced_score + 600){
2022                 s->interlaced_dct=1;
2023
2024                 dct_offset= wrap_y;
2025                 wrap_y<<=1;
2026             }else
2027                 s->interlaced_dct=0;
2028         }
2029
2030         diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2031         diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2032         diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2033         diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2034
2035         if(s->flags&CODEC_FLAG_GRAY){
2036             skip_dct[4]= 1;
2037             skip_dct[5]= 1;
2038         }else{
2039             if(emu){
2040                 emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2041                 ptr_cb= s->edge_emu_buffer;
2042             }
2043             diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2044             if(emu){
2045                 emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2046                 ptr_cr= s->edge_emu_buffer;
2047             }
2048             diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2049         }
2050
2051         /* pre quantization */
2052         if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
2053             //FIXME optimize
2054             if(pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2055             if(pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2056             if(pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2057             if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2058             if(pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
2059             if(pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
2060 #if 0
2061 {
2062  static int stat[7];
2063  int num=0;
2064  for(i=0; i<6; i++)
2065   if(skip_dct[i]) num++;
2066  stat[num]++;
2067
2068  if(s->mb_x==0 && s->mb_y==0){
2069   for(i=0; i<7; i++){
2070    printf("%6d %1d\n", stat[i], i);
2071   }
2072  }
2073 }
2074 #endif
2075         }
2076
2077     }
2078
2079 #if 0
2080             {
2081                 float adap_parm;
2082
2083                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
2084                             ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2085
2086                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
2087                         (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P',
2088                         s->qscale, adap_parm, s->qscale*adap_parm,
2089                         s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
2090             }
2091 #endif
2092     /* DCT & quantize */
2093     if(s->out_format==FMT_MJPEG){
2094         for(i=0;i<6;i++) {
2095             int overflow;
2096             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2097             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2098         }
2099     }else{
2100         for(i=0;i<6;i++) {
2101             if(!skip_dct[i]){
2102                 int overflow;
2103                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2104             // FIXME we could decide to change to quantizer instead of clipping
2105             // JS: I don't think that would be a good idea it could lower quality instead
2106             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2107                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2108             }else
2109                 s->block_last_index[i]= -1;
2110         }
2111         if(s->luma_elim_threshold && !s->mb_intra)
2112             for(i=0; i<4; i++)
2113                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2114         if(s->chroma_elim_threshold && !s->mb_intra)
2115             for(i=4; i<6; i++)
2116                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2117     }
2118
2119     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
2120         s->block_last_index[4]=
2121         s->block_last_index[5]= 0;
2122         s->block[4][0]=
2123         s->block[5][0]= 128;
2124     }
2125
2126     /* huffman encode */
2127     switch(s->out_format) {
2128     case FMT_MPEG1:
2129         mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2130         break;
2131     case FMT_H263:
2132         if (s->h263_msmpeg4)
2133             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2134         else if(s->h263_pred)
2135             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2136         else
2137             h263_encode_mb(s, s->block, motion_x, motion_y);
2138         break;
2139     case FMT_MJPEG:
2140         mjpeg_encode_mb(s, s->block);
2141         break;
2142     }
2143 }
2144
2145 void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
2146 {
2147     int bytes= length>>4;
2148     int bits= length&15;
2149     int i;
2150
2151     if(length==0) return;
2152
2153     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
2154     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
2155 }
2156
2157 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2158     int i;
2159
2160     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2161
2162     /* mpeg1 */
2163     d->mb_incr= s->mb_incr;
2164     for(i=0; i<3; i++)
2165         d->last_dc[i]= s->last_dc[i];
2166
2167     /* statistics */
2168     d->mv_bits= s->mv_bits;
2169     d->i_tex_bits= s->i_tex_bits;
2170     d->p_tex_bits= s->p_tex_bits;
2171     d->i_count= s->i_count;
2172     d->f_count= s->f_count;
2173     d->b_count= s->b_count;
2174     d->skip_count= s->skip_count;
2175     d->misc_bits= s->misc_bits;
2176     d->last_bits= 0;
2177
2178     d->mb_skiped= s->mb_skiped;
2179 }
2180
2181 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2182     int i;
2183
2184     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2185     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2186
2187     /* mpeg1 */
2188     d->mb_incr= s->mb_incr;
2189     for(i=0; i<3; i++)
2190         d->last_dc[i]= s->last_dc[i];
2191
2192     /* statistics */
2193     d->mv_bits= s->mv_bits;
2194     d->i_tex_bits= s->i_tex_bits;
2195     d->p_tex_bits= s->p_tex_bits;
2196     d->i_count= s->i_count;
2197     d->f_count= s->f_count;
2198     d->b_count= s->b_count;
2199     d->skip_count= s->skip_count;
2200     d->misc_bits= s->misc_bits;
2201
2202     d->mb_intra= s->mb_intra;
2203     d->mb_skiped= s->mb_skiped;
2204     d->mv_type= s->mv_type;
2205     d->mv_dir= s->mv_dir;
2206     d->pb= s->pb;
2207     if(s->data_partitioning){
2208         d->pb2= s->pb2;
2209         d->tex_pb= s->tex_pb;
2210     }
2211     d->block= s->block;
2212     for(i=0; i<6; i++)
2213         d->block_last_index[i]= s->block_last_index[i];
2214 }
2215
2216 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2217                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2218                            int *dmin, int *next_block, int motion_x, int motion_y)
2219 {
2220     int bits_count;
2221
2222     copy_context_before_encode(s, backup, type);
2223
2224     s->block= s->blocks[*next_block];
2225     s->pb= pb[*next_block];
2226     if(s->data_partitioning){
2227         s->pb2   = pb2   [*next_block];
2228         s->tex_pb= tex_pb[*next_block];
2229     }
2230
2231     encode_mb(s, motion_x, motion_y);
2232
2233     bits_count= get_bit_count(&s->pb);
2234     if(s->data_partitioning){
2235         bits_count+= get_bit_count(&s->pb2);
2236         bits_count+= get_bit_count(&s->tex_pb);
2237     }
2238
2239     if(bits_count<*dmin){
2240         *dmin= bits_count;
2241         *next_block^=1;
2242
2243         copy_context_after_encode(best, s, type);
2244     }
2245 }
2246
2247 static void encode_picture(MpegEncContext *s, int picture_number)
2248 {
2249     int mb_x, mb_y, last_gob, pdif = 0;
2250     int i;
2251     int bits;
2252     MpegEncContext best_s, backup_s;
2253     UINT8 bit_buf[2][3000];
2254     UINT8 bit_buf2[2][3000];
2255     UINT8 bit_buf_tex[2][3000];
2256     PutBitContext pb[2], pb2[2], tex_pb[2];
2257
2258     for(i=0; i<2; i++){
2259         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
2260         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
2261         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
2262     }
2263
2264     s->picture_number = picture_number;
2265
2266     s->block_wrap[0]=
2267     s->block_wrap[1]=
2268     s->block_wrap[2]=
2269     s->block_wrap[3]= s->mb_width*2 + 2;
2270     s->block_wrap[4]=
2271     s->block_wrap[5]= s->mb_width + 2;
2272
2273     /* Reset the average MB variance */
2274     s->mb_var_sum = 0;
2275     s->mc_mb_var_sum = 0;
2276
2277     /* we need to initialize some time vars before we can encode b-frames */
2278     if (s->h263_pred && !s->h263_msmpeg4)
2279         ff_set_mpeg4_time(s, s->picture_number);
2280
2281     s->scene_change_score=0;
2282
2283     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
2284
2285     /* Estimate motion for every MB */
2286     if(s->pict_type != I_TYPE){
2287         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2288             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2289             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2290             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2291             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2292             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2293                 s->mb_x = mb_x;
2294                 s->mb_y = mb_y;
2295                 s->block_index[0]+=2;
2296                 s->block_index[1]+=2;
2297                 s->block_index[2]+=2;
2298                 s->block_index[3]+=2;
2299
2300                 /* compute motion vector & mb_type and store in context */
2301                 if(s->pict_type==B_TYPE)
2302                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
2303                 else
2304                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
2305 //                s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
2306             }
2307         }
2308         emms_c();
2309     }else /* if(s->pict_type == I_TYPE) */{
2310         /* I-Frame */
2311         //FIXME do we need to zero them?
2312         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
2313         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
2314         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2315
2316         if(!s->fixed_qscale){
2317             /* finding spatial complexity for I-frame rate control */
2318             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2319                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2320                     int xx = mb_x * 16;
2321                     int yy = mb_y * 16;
2322                     uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
2323                     int varc;
2324                     int sum = pix_sum(pix, s->linesize);
2325
2326                     sum= (sum+8)>>4;
2327                     varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
2328
2329                     s->mb_var [s->mb_width * mb_y + mb_x] = varc;
2330                     s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+7)>>4;
2331                     s->mb_var_sum    += varc;
2332                 }
2333             }
2334         }
2335     }
2336     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
2337         s->pict_type= I_TYPE;
2338         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2339         if(s->max_b_frames==0){
2340             s->input_pict_type= I_TYPE;
2341             s->input_picture_in_gop_number=0;
2342         }
2343 //printf("Scene change detected, encoding as I Frame %d %d\n", s->mb_var_sum, s->mc_mb_var_sum);
2344     }
2345
2346     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE)
2347         s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
2348         ff_fix_long_p_mvs(s);
2349     if(s->pict_type==B_TYPE){
2350         s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
2351         s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
2352
2353         ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
2354         ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
2355         ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
2356         ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
2357     }
2358
2359     if (s->fixed_qscale)
2360         s->frame_qscale = s->avctx->quality;
2361     else
2362         s->frame_qscale = ff_rate_estimate_qscale(s);
2363
2364     if(s->adaptive_quant){
2365         switch(s->codec_id){
2366         case CODEC_ID_MPEG4:
2367             ff_clean_mpeg4_qscales(s);
2368             break;
2369         case CODEC_ID_H263:
2370         case CODEC_ID_H263P:
2371             ff_clean_h263_qscales(s);
2372             break;
2373         }
2374
2375         s->qscale= s->qscale_table[0];
2376     }else
2377         s->qscale= (int)(s->frame_qscale + 0.5);
2378
2379     if (s->out_format == FMT_MJPEG) {
2380         /* for mjpeg, we do include qscale in the matrix */
2381         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
2382         for(i=1;i<64;i++)
2383             s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2384         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
2385                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
2386     }
2387
2388     s->last_bits= get_bit_count(&s->pb);
2389     switch(s->out_format) {
2390     case FMT_MJPEG:
2391         mjpeg_picture_header(s);
2392         break;
2393     case FMT_H263:
2394         if (s->h263_msmpeg4)
2395             msmpeg4_encode_picture_header(s, picture_number);
2396         else if (s->h263_pred)
2397             mpeg4_encode_picture_header(s, picture_number);
2398         else if (s->h263_rv10)
2399             rv10_encode_picture_header(s, picture_number);
2400         else
2401             h263_encode_picture_header(s, picture_number);
2402         break;
2403     case FMT_MPEG1:
2404         mpeg1_encode_picture_header(s, picture_number);
2405         break;
2406     }
2407     bits= get_bit_count(&s->pb);
2408     s->header_bits= bits - s->last_bits;
2409     s->last_bits= bits;
2410     s->mv_bits=0;
2411     s->misc_bits=0;
2412     s->i_tex_bits=0;
2413     s->p_tex_bits=0;
2414     s->i_count=0;
2415     s->f_count=0;
2416     s->b_count=0;
2417     s->skip_count=0;
2418
2419     /* init last dc values */
2420     /* note: quant matrix value (8) is implied here */
2421     s->last_dc[0] = 128;
2422     s->last_dc[1] = 128;
2423     s->last_dc[2] = 128;
2424     s->mb_incr = 1;
2425     s->last_mv[0][0][0] = 0;
2426     s->last_mv[0][0][1] = 0;
2427
2428     /* Get the GOB height based on picture height */
2429     if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4) {
2430         if (s->height <= 400)
2431             s->gob_index = 1;
2432         else if (s->height <= 800)
2433             s->gob_index = 2;
2434         else
2435             s->gob_index = 4;
2436     }else if(s->codec_id==CODEC_ID_MPEG4){
2437         s->gob_index = 1;
2438     }
2439
2440     if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE)
2441         ff_mpeg4_init_partitions(s);
2442
2443     s->resync_mb_x=0;
2444     s->resync_mb_y=0;
2445     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2446         /* Put GOB header based on RTP MTU for formats which support it per line (H263*)*/
2447         /* TODO: Put all this stuff in a separate generic function */
2448         if (s->rtp_mode) {
2449             if (!mb_y) {
2450                 s->ptr_lastgob = s->pb.buf;
2451                 s->ptr_last_mb_line = s->pb.buf;
2452             } else if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4 && !(mb_y % s->gob_index)) {
2453                 // MN: we could move the space check from h263 -> here, as its not h263 specific
2454                 last_gob = h263_encode_gob_header(s, mb_y);
2455                 if (last_gob) {
2456                     s->first_slice_line = 1;
2457                 }else{
2458                     /*MN: we reset it here instead at the end of each line cuz mpeg4 can have
2459                           slice lines starting & ending in the middle*/
2460                     s->first_slice_line = 0;
2461                 }
2462             }
2463         }
2464
2465         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2466         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2467
2468         s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2469         s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2470         s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2471         s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2472         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
2473         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
2474         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2475             const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
2476             const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
2477 //            int d;
2478             int dmin=10000000;
2479
2480             s->mb_x = mb_x;
2481             s->mb_y = mb_y;
2482             s->block_index[0]+=2;
2483             s->block_index[1]+=2;
2484             s->block_index[2]+=2;
2485             s->block_index[3]+=2;
2486             s->block_index[4]++;
2487             s->block_index[5]++;
2488
2489             /* write gob / video packet header for formats which support it at any MB (MPEG4) */
2490             if(s->rtp_mode && s->mb_y>0 && s->codec_id==CODEC_ID_MPEG4){
2491                 int pdif= pbBufPtr(&s->pb) - s->ptr_lastgob;
2492
2493                 //the *2 is there so we stay below the requested size
2494                 if(pdif + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size){
2495                     if(s->codec_id==CODEC_ID_MPEG4){
2496                         if(s->data_partitioning && s->pict_type!=B_TYPE){
2497                             ff_mpeg4_merge_partitions(s);
2498                             ff_mpeg4_init_partitions(s);
2499                         }
2500                         ff_mpeg4_encode_video_packet_header(s);
2501
2502                         if(s->flags&CODEC_FLAG_PASS1){
2503                             int bits= get_bit_count(&s->pb);
2504                             s->misc_bits+= bits - s->last_bits;
2505                             s->last_bits= bits;
2506                         }
2507                         ff_mpeg4_clean_buffers(s);
2508                     }
2509                     s->ptr_lastgob = pbBufPtr(&s->pb);
2510                     s->first_slice_line=1;
2511                     s->resync_mb_x=mb_x;
2512                     s->resync_mb_y=mb_y;
2513                 }
2514
2515                 if(  (s->resync_mb_x   == s->mb_x)
2516                    && s->resync_mb_y+1 == s->mb_y){
2517                     s->first_slice_line=0;
2518                 }
2519             }
2520
2521             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
2522                 int next_block=0;
2523                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2524
2525                 copy_context_before_encode(&backup_s, s, -1);
2526                 backup_s.pb= s->pb;
2527                 best_s.data_partitioning= s->data_partitioning;
2528                 if(s->data_partitioning){
2529                     backup_s.pb2= s->pb2;
2530                     backup_s.tex_pb= s->tex_pb;
2531                 }
2532
2533                 if(mb_type&MB_TYPE_INTER){
2534                     s->mv_dir = MV_DIR_FORWARD;
2535                     s->mv_type = MV_TYPE_16X16;
2536                     s->mb_intra= 0;
2537                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2538                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2539                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
2540                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2541                 }
2542                 if(mb_type&MB_TYPE_INTER4V){
2543                     s->mv_dir = MV_DIR_FORWARD;
2544                     s->mv_type = MV_TYPE_8X8;
2545                     s->mb_intra= 0;
2546                     for(i=0; i<4; i++){
2547                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
2548                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
2549                     }
2550                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
2551                                  &dmin, &next_block, 0, 0);
2552                 }
2553                 if(mb_type&MB_TYPE_FORWARD){
2554                     s->mv_dir = MV_DIR_FORWARD;
2555                     s->mv_type = MV_TYPE_16X16;
2556                     s->mb_intra= 0;
2557                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2558                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2559                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
2560                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2561                 }
2562                 if(mb_type&MB_TYPE_BACKWARD){
2563                     s->mv_dir = MV_DIR_BACKWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2567                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2568                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2569                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2570                 }
2571                 if(mb_type&MB_TYPE_BIDIR){
2572                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2573                     s->mv_type = MV_TYPE_16X16;
2574                     s->mb_intra= 0;
2575                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2576                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2577                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2578                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2579                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
2580                                  &dmin, &next_block, 0, 0);
2581                 }
2582                 if(mb_type&MB_TYPE_DIRECT){
2583                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2584                     s->mv_type = MV_TYPE_16X16; //FIXME
2585                     s->mb_intra= 0;
2586                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
2587                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
2588                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
2589                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
2590                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
2591                                  &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
2592                 }
2593                 if(mb_type&MB_TYPE_INTRA){
2594                     s->mv_dir = MV_DIR_FORWARD;
2595                     s->mv_type = MV_TYPE_16X16;
2596                     s->mb_intra= 1;
2597                     s->mv[0][0][0] = 0;
2598                     s->mv[0][0][1] = 0;
2599                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
2600                                  &dmin, &next_block, 0, 0);
2601                     /* force cleaning of ac/dc pred stuff if needed ... */
2602                     if(s->h263_pred || s->h263_aic)
2603                         s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
2604                 }
2605                 copy_context_after_encode(s, &best_s, -1);
2606
2607                 pb_bits_count= get_bit_count(&s->pb);
2608                 flush_put_bits(&s->pb);
2609                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2610                 s->pb= backup_s.pb;
2611
2612                 if(s->data_partitioning){
2613                     pb2_bits_count= get_bit_count(&s->pb2);
2614                     flush_put_bits(&s->pb2);
2615                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2616                     s->pb2= backup_s.pb2;
2617
2618                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
2619                     flush_put_bits(&s->tex_pb);
2620                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2621                     s->tex_pb= backup_s.tex_pb;
2622                 }
2623                 s->last_bits= get_bit_count(&s->pb);
2624             } else {
2625                 int motion_x, motion_y;
2626                 s->mv_type=MV_TYPE_16X16;
2627                 // only one MB-Type possible
2628                 switch(mb_type){
2629                 case MB_TYPE_INTRA:
2630                     s->mv_dir = MV_DIR_FORWARD;
2631                     s->mb_intra= 1;
2632                     motion_x= s->mv[0][0][0] = 0;
2633                     motion_y= s->mv[0][0][1] = 0;
2634                     break;
2635                 case MB_TYPE_INTER:
2636                     s->mv_dir = MV_DIR_FORWARD;
2637                     s->mb_intra= 0;
2638                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2639                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2640                     break;
2641                 case MB_TYPE_INTER4V:
2642                     s->mv_dir = MV_DIR_FORWARD;
2643                     s->mv_type = MV_TYPE_8X8;
2644                     s->mb_intra= 0;
2645                     for(i=0; i<4; i++){
2646                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
2647                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
2648                     }
2649                     motion_x= motion_y= 0;
2650                     break;
2651                 case MB_TYPE_DIRECT:
2652                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2653                     s->mb_intra= 0;
2654                     motion_x=s->b_direct_mv_table[xy][0];
2655                     motion_y=s->b_direct_mv_table[xy][1];
2656                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
2657                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
2658                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
2659                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
2660                     break;
2661                 case MB_TYPE_BIDIR:
2662                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2663                     s->mb_intra= 0;
2664                     motion_x=0;
2665                     motion_y=0;
2666                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2667                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2668                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2669                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2670                     break;
2671                 case MB_TYPE_BACKWARD:
2672                     s->mv_dir = MV_DIR_BACKWARD;
2673                     s->mb_intra= 0;
2674                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2675                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2676                     break;
2677                 case MB_TYPE_FORWARD:
2678                     s->mv_dir = MV_DIR_FORWARD;
2679                     s->mb_intra= 0;
2680                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2681                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2682 //                    printf(" %d %d ", motion_x, motion_y);
2683                     break;
2684                 default:
2685                     motion_x=motion_y=0; //gcc warning fix
2686                     printf("illegal MB type\n");
2687                 }
2688                 encode_mb(s, motion_x, motion_y);
2689             }
2690             /* clean the MV table in IPS frames for direct mode in B frames */
2691             if(s->mb_intra /* && I,P,S_TYPE */){
2692                 s->p_mv_table[xy][0]=0;
2693                 s->p_mv_table[xy][1]=0;
2694             }
2695
2696             MPV_decode_mb(s, s->block);
2697 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
2698         }
2699
2700
2701         /* Obtain average GOB size for RTP */
2702         if (s->rtp_mode) {
2703             if (!mb_y)
2704                 s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
2705             else if (!(mb_y % s->gob_index)) {
2706                 s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
2707                 s->ptr_last_mb_line = pbBufPtr(&s->pb);
2708             }
2709             //fprintf(stderr, "\nMB line: %d\tSize: %u\tAvg. Size: %u", s->mb_y,
2710             //                    (s->pb.buf_ptr - s->ptr_last_mb_line), s->mb_line_avgsize);
2711             if(s->codec_id!=CODEC_ID_MPEG4) s->first_slice_line = 0; //FIXME clean
2712         }
2713     }
2714     emms_c();
2715
2716     if(s->codec_id==CODEC_ID_MPEG4 && s->data_partitioning && s->pict_type!=B_TYPE)
2717         ff_mpeg4_merge_partitions(s);
2718
2719     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
2720         msmpeg4_encode_ext_header(s);
2721
2722     if(s->codec_id==CODEC_ID_MPEG4)
2723         ff_mpeg4_stuffing(&s->pb);
2724
2725     //if (s->gob_number)
2726     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
2727
2728     /* Send the last GOB if RTP */
2729     if (s->rtp_mode) {
2730         flush_put_bits(&s->pb);
2731         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
2732         /* Call the RTP callback to send the last GOB */
2733         if (s->rtp_callback)
2734             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
2735         s->ptr_lastgob = pbBufPtr(&s->pb);
2736         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
2737     }
2738 }
2739
2740 static int dct_quantize_c(MpegEncContext *s,
2741                         DCTELEM *block, int n,
2742                         int qscale, int *overflow)
2743 {
2744     int i, j, level, last_non_zero, q;
2745     const int *qmat;
2746     int bias;
2747     int max=0;
2748     unsigned int threshold1, threshold2;
2749
2750     s->fdct (block);
2751
2752 #ifndef ARCH_ALPHA              /* Alpha uses unpermuted matrix */
2753     /* we need this permutation so that we correct the IDCT
2754        permutation. will be moved into DCT code */
2755     block_permute(block);
2756 #endif
2757
2758     if (s->mb_intra) {
2759         if (!s->h263_aic) {
2760             if (n < 4)
2761                 q = s->y_dc_scale;
2762             else
2763                 q = s->c_dc_scale;
2764             q = q << 3;
2765         } else
2766             /* For AIC we skip quant/dequant of INTRADC */
2767             q = 1 << 3;
2768
2769         /* note: block[0] is assumed to be positive */
2770         block[0] = (block[0] + (q >> 1)) / q;
2771         i = 1;
2772         last_non_zero = 0;
2773         qmat = s->q_intra_matrix[qscale];
2774         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
2775     } else {
2776         i = 0;
2777         last_non_zero = -1;
2778         qmat = s->q_inter_matrix[qscale];
2779         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
2780     }
2781     threshold1= (1<<QMAT_SHIFT) - bias - 1;
2782     threshold2= (threshold1<<1);
2783
2784     for(;i<64;i++) {
2785         j = zigzag_direct[i];
2786         level = block[j];
2787         level = level * qmat[j];
2788
2789 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
2790 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
2791         if(((unsigned)(level+threshold1))>threshold2){
2792             if(level>0){
2793                 level= (bias + level)>>QMAT_SHIFT;
2794                 block[j]= level;
2795             }else{
2796                 level= (bias - level)>>QMAT_SHIFT;
2797                 block[j]= -level;
2798             }
2799             max |=level;
2800             last_non_zero = i;
2801         }else{
2802             block[j]=0;
2803         }
2804     }
2805     *overflow= s->max_qcoeff < max; //overflow might have happend
2806
2807     return last_non_zero;
2808 }
2809
2810 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
2811                                    DCTELEM *block, int n, int qscale)
2812 {
2813     int i, level, nCoeffs;
2814     const UINT16 *quant_matrix;
2815
2816     if(s->alternate_scan) nCoeffs= 64;
2817     else nCoeffs= s->block_last_index[n]+1;
2818
2819     if (s->mb_intra) {
2820         if (n < 4)
2821             block[0] = block[0] * s->y_dc_scale;
2822         else
2823             block[0] = block[0] * s->c_dc_scale;
2824         /* XXX: only mpeg1 */
2825         quant_matrix = s->intra_matrix;
2826         for(i=1;i<nCoeffs;i++) {
2827             int j= zigzag_direct[i];
2828             level = block[j];
2829             if (level) {
2830                 if (level < 0) {
2831                     level = -level;
2832                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
2833                     level = (level - 1) | 1;
2834                     level = -level;
2835                 } else {
2836                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
2837                     level = (level - 1) | 1;
2838                 }
2839 #ifdef PARANOID
2840                 if (level < -2048 || level > 2047)
2841                     fprintf(stderr, "unquant error %d %d\n", i, level);
2842 #endif
2843                 block[j] = level;
2844             }
2845         }
2846     } else {
2847         i = 0;
2848         quant_matrix = s->inter_matrix;
2849         for(;i<nCoeffs;i++) {
2850             int j= zigzag_direct[i];
2851             level = block[j];
2852             if (level) {
2853                 if (level < 0) {
2854                     level = -level;
2855                     level = (((level << 1) + 1) * qscale *
2856                              ((int) (quant_matrix[j]))) >> 4;
2857                     level = (level - 1) | 1;
2858                     level = -level;
2859                 } else {
2860                     level = (((level << 1) + 1) * qscale *
2861                              ((int) (quant_matrix[j]))) >> 4;
2862                     level = (level - 1) | 1;
2863                 }
2864 #ifdef PARANOID
2865                 if (level < -2048 || level > 2047)
2866                     fprintf(stderr, "unquant error %d %d\n", i, level);
2867 #endif
2868                 block[j] = level;
2869             }
2870         }
2871     }
2872 }
2873
2874 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
2875                                    DCTELEM *block, int n, int qscale)
2876 {
2877     int i, level, nCoeffs;
2878     const UINT16 *quant_matrix;
2879
2880     if(s->alternate_scan) nCoeffs= 64;
2881     else nCoeffs= s->block_last_index[n]+1;
2882
2883     if (s->mb_intra) {
2884         if (n < 4)
2885             block[0] = block[0] * s->y_dc_scale;
2886         else
2887             block[0] = block[0] * s->c_dc_scale;
2888         quant_matrix = s->intra_matrix;
2889         for(i=1;i<nCoeffs;i++) {
2890             int j= zigzag_direct[i];
2891             level = block[j];
2892             if (level) {
2893                 if (level < 0) {
2894                     level = -level;
2895                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
2896                     level = -level;
2897                 } else {
2898                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
2899                 }
2900 #ifdef PARANOID
2901                 if (level < -2048 || level > 2047)
2902                     fprintf(stderr, "unquant error %d %d\n", i, level);
2903 #endif
2904                 block[j] = level;
2905             }
2906         }
2907     } else {
2908         int sum=-1;
2909         i = 0;
2910         quant_matrix = s->inter_matrix;
2911         for(;i<nCoeffs;i++) {
2912             int j= zigzag_direct[i];
2913             level = block[j];
2914             if (level) {
2915                 if (level < 0) {
2916                     level = -level;
2917                     level = (((level << 1) + 1) * qscale *
2918                              ((int) (quant_matrix[j]))) >> 4;
2919                     level = -level;
2920                 } else {
2921                     level = (((level << 1) + 1) * qscale *
2922                              ((int) (quant_matrix[j]))) >> 4;
2923                 }
2924 #ifdef PARANOID
2925                 if (level < -2048 || level > 2047)
2926                     fprintf(stderr, "unquant error %d %d\n", i, level);
2927 #endif
2928                 block[j] = level;
2929                 sum+=level;
2930             }
2931         }
2932         block[63]^=sum&1;
2933     }
2934 }
2935
2936
2937 static void dct_unquantize_h263_c(MpegEncContext *s,
2938                                   DCTELEM *block, int n, int qscale)
2939 {
2940     int i, level, qmul, qadd;
2941     int nCoeffs;
2942
2943     if (s->mb_intra) {
2944         if (!s->h263_aic) {
2945             if (n < 4)
2946                 block[0] = block[0] * s->y_dc_scale;
2947             else
2948                 block[0] = block[0] * s->c_dc_scale;
2949         }
2950         i = 1;
2951         nCoeffs= 64; //does not allways use zigzag table
2952     } else {
2953         i = 0;
2954         nCoeffs= zigzag_end[ s->block_last_index[n] ];
2955     }
2956
2957     qmul = s->qscale << 1;
2958     if (s->h263_aic && s->mb_intra)
2959         qadd = 0;
2960     else
2961         qadd = (s->qscale - 1) | 1;
2962
2963     for(;i<nCoeffs;i++) {
2964         level = block[i];
2965         if (level) {
2966             if (level < 0) {
2967                 level = level * qmul - qadd;
2968             } else {
2969                 level = level * qmul + qadd;
2970             }
2971 #ifdef PARANOID
2972                 if (level < -2048 || level > 2047)
2973                     fprintf(stderr, "unquant error %d %d\n", i, level);
2974 #endif
2975             block[i] = level;
2976         }
2977     }
2978 }
2979
2980 static void remove_ac(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int mb_x, int mb_y)
2981 {
2982     int dc, dcb, dcr, y, i;
2983     for(i=0; i<4; i++){
2984         dc= s->dc_val[0][mb_x*2+1 + (i&1) + (mb_y*2+1 + (i>>1))*(s->mb_width*2+2)];
2985         for(y=0; y<8; y++){
2986             int x;
2987             for(x=0; x<8; x++){
2988                 dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8;
2989             }
2990         }
2991     }
2992     dcb = s->dc_val[1][mb_x+1 + (mb_y+1)*(s->mb_width+2)];
2993     dcr= s->dc_val[2][mb_x+1 + (mb_y+1)*(s->mb_width+2)];
2994     for(y=0; y<8; y++){
2995         int x;
2996         for(x=0; x<8; x++){
2997             dest_cb[x + y*(s->uvlinesize)]= dcb/8;
2998             dest_cr[x + y*(s->uvlinesize)]= dcr/8;
2999         }
3000     }
3001 }
3002
3003 /**
3004  * will conceal past errors, and allso drop b frames if needed
3005  *
3006  */
3007 void ff_conceal_past_errors(MpegEncContext *s, int unknown_pos)
3008 {
3009     int mb_x= s->mb_x;
3010     int mb_y= s->mb_y;
3011     int mb_dist=0;
3012     int i, intra_count=0, inter_count=0;
3013     int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune
3014     int inter_conceal= s->msmpeg4_version ? 50 : 50;
3015
3016     // for last block
3017     if(mb_x>=s->mb_width)  mb_x= s->mb_width -1;
3018     if(mb_y>=s->mb_height) mb_y= s->mb_height-1;
3019
3020     if(s->decoding_error==0 && unknown_pos){
3021         if(s->data_partitioning && s->pict_type!=B_TYPE)
3022                 s->decoding_error= DECODING_AC_LOST;
3023         else
3024                 s->decoding_error= DECODING_DESYNC;
3025     }
3026
3027     if(s->decoding_error==DECODING_DESYNC && s->pict_type!=B_TYPE) s->next_p_frame_damaged=1;
3028
3029     for(i=mb_x + mb_y*s->mb_width; i>=0; i--){
3030         if(s->mbintra_table[i]) intra_count++;
3031         else                    inter_count++;
3032     }
3033
3034     if(s->decoding_error==DECODING_AC_LOST){
3035         intra_conceal*=2;
3036         inter_conceal*=2;
3037     }else if(s->decoding_error==DECODING_ACDC_LOST){
3038         intra_conceal*=2;
3039         inter_conceal*=2;
3040     }
3041
3042     if(unknown_pos && (intra_count<inter_count)){
3043         intra_conceal= inter_conceal= s->mb_num;
3044 //        printf("%d %d\n",intra_count, inter_count);
3045     }
3046
3047     fprintf(stderr, "concealing errors\n");
3048
3049     /* for all MBs from the current one back until the last resync marker */
3050     for(; mb_y>=0 && mb_y>=s->resync_mb_y; mb_y--){
3051         for(; mb_x>=0; mb_x--){
3052             uint8_t *dest_y  = s->current_picture[0] + (mb_y * 16*  s->linesize      ) + mb_x * 16;
3053             uint8_t *dest_cb = s->current_picture[1] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8;
3054             uint8_t *dest_cr = s->current_picture[2] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8;
3055             int mb_x_backup= s->mb_x; //FIXME pass xy to mpeg_motion
3056             int mb_y_backup= s->mb_y;
3057             s->mb_x=mb_x;
3058             s->mb_y=mb_y;
3059             if(s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_dist<intra_conceal){
3060                 if(s->decoding_error==DECODING_AC_LOST){
3061                     remove_ac(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
3062 //                    printf("remove ac to %d %d\n", mb_x, mb_y);
3063                 }else{
3064                     mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
3065                                 s->last_picture, 0, 0, put_pixels_tab,
3066                                 0/*mx*/, 0/*my*/, 16);
3067                 }
3068             }
3069             else if(!s->mbintra_table[mb_y*s->mb_width + mb_x] && mb_dist<inter_conceal){
3070                 int mx=0;
3071                 int my=0;
3072
3073                 if(s->decoding_error!=DECODING_DESYNC){
3074                     int xy= mb_x*2+1 + (mb_y*2+1)*(s->mb_width*2+2);
3075                     mx= s->motion_val[ xy ][0];
3076                     my= s->motion_val[ xy ][1];
3077                 }
3078
3079                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
3080                             s->last_picture, 0, 0, put_pixels_tab,
3081                             mx, my, 16);
3082             }
3083             s->mb_x= mb_x_backup;
3084             s->mb_y= mb_y_backup;
3085
3086             if(mb_x== s->resync_mb_x && mb_y== s->resync_mb_y) return;
3087             if(!s->mbskip_table[mb_x + mb_y*s->mb_width]) mb_dist++;
3088         }
3089         mb_x=s->mb_width-1;
3090     }
3091 }
3092
3093 AVCodec mpeg1video_encoder = {
3094     "mpeg1video",
3095     CODEC_TYPE_VIDEO,
3096     CODEC_ID_MPEG1VIDEO,
3097     sizeof(MpegEncContext),
3098     MPV_encode_init,
3099     MPV_encode_picture,
3100     MPV_encode_end,
3101 };
3102
3103 AVCodec h263_encoder = {
3104     "h263",
3105     CODEC_TYPE_VIDEO,
3106     CODEC_ID_H263,
3107     sizeof(MpegEncContext),
3108     MPV_encode_init,
3109     MPV_encode_picture,
3110     MPV_encode_end,
3111 };
3112
3113 AVCodec h263p_encoder = {
3114     "h263p",
3115     CODEC_TYPE_VIDEO,
3116     CODEC_ID_H263P,
3117     sizeof(MpegEncContext),
3118     MPV_encode_init,
3119     MPV_encode_picture,
3120     MPV_encode_end,
3121 };
3122
3123 AVCodec rv10_encoder = {
3124     "rv10",
3125     CODEC_TYPE_VIDEO,
3126     CODEC_ID_RV10,
3127     sizeof(MpegEncContext),
3128     MPV_encode_init,
3129     MPV_encode_picture,
3130     MPV_encode_end,
3131 };
3132
3133 AVCodec mjpeg_encoder = {
3134     "mjpeg",
3135     CODEC_TYPE_VIDEO,
3136     CODEC_ID_MJPEG,
3137     sizeof(MpegEncContext),
3138     MPV_encode_init,
3139     MPV_encode_picture,
3140     MPV_encode_end,
3141 };
3142
3143 AVCodec mpeg4_encoder = {
3144     "mpeg4",
3145     CODEC_TYPE_VIDEO,
3146     CODEC_ID_MPEG4,
3147     sizeof(MpegEncContext),
3148     MPV_encode_init,
3149     MPV_encode_picture,
3150     MPV_encode_end,
3151 };
3152
3153 AVCodec msmpeg4v1_encoder = {
3154     "msmpeg4v1",
3155     CODEC_TYPE_VIDEO,
3156     CODEC_ID_MSMPEG4V1,
3157     sizeof(MpegEncContext),
3158     MPV_encode_init,
3159     MPV_encode_picture,
3160     MPV_encode_end,
3161 };
3162
3163 AVCodec msmpeg4v2_encoder = {
3164     "msmpeg4v2",
3165     CODEC_TYPE_VIDEO,
3166     CODEC_ID_MSMPEG4V2,
3167     sizeof(MpegEncContext),
3168     MPV_encode_init,
3169     MPV_encode_picture,
3170     MPV_encode_end,
3171 };
3172
3173 AVCodec msmpeg4v3_encoder = {
3174     "msmpeg4",
3175     CODEC_TYPE_VIDEO,
3176     CODEC_ID_MSMPEG4V3,
3177     sizeof(MpegEncContext),
3178     MPV_encode_init,
3179     MPV_encode_picture,
3180     MPV_encode_end,
3181 };
3182
3183 AVCodec wmv1_encoder = {
3184     "wmv1",
3185     CODEC_TYPE_VIDEO,
3186     CODEC_ID_WMV1,
3187     sizeof(MpegEncContext),
3188     MPV_encode_init,
3189     MPV_encode_picture,
3190     MPV_encode_end,
3191 };
3192
3193 AVCodec wmv2_encoder = {
3194     "wmv2",
3195     CODEC_TYPE_VIDEO,
3196     CODEC_ID_WMV2,
3197     sizeof(MpegEncContext),
3198     MPV_encode_init,
3199     MPV_encode_picture,
3200     MPV_encode_end,
3201 };