git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 #include <ctype.h>
  23 #include "avcodec.h"
  24 #include "dsputil.h"
  25 #include "mpegvideo.h"
  26 #include "simple_idct.h"
  27
  28 #ifdef USE_FASTMEMCPY
  29 #include "fastmemcpy.h"
  30 #endif
  31
  32 //#undef NDEBUG
  33 //#include <assert.h>
  34
  35 static void encode_picture(MpegEncContext *s, int picture_number);
  36 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  37                                    DCTELEM *block, int n, int qscale);
  38 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  39                                    DCTELEM *block, int n, int qscale);
  40 static void dct_unquantize_h263_c(MpegEncContext *s,
  41                                   DCTELEM *block, int n, int qscale);
  42 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
  43 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  44 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  45
  46 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
  47
  48
  49 /* enable all paranoid tests for rounding, overflows, etc... */
  50 //#define PARANOID
  51
  52 //#define DEBUG
  53
  54
  55 /* for jpeg fast DCT */
  56 #define CONST_BITS 14
  57
  58 static const uint16_t aanscales[64] = {
  59     /* precomputed values scaled up by 14 bits */
  60     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  61     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  62     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  63     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  64     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  65     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  66     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  67     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  68 };
  69
  70 /* Input permutation for the simple_idct_mmx */
  71 static const uint8_t simple_mmx_permutation[64]={
  72         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
  73         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
  74         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
  75         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
  76         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
  77         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
  78         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
  79         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
  80 };
  81
  82 static const uint8_t h263_chroma_roundtab[16] = {
  83     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  84 };
  85
  86 static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  87 static UINT8 default_fcode_tab[MAX_MV*2+1];
  88
  89 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  90                            const UINT16 *quant_matrix, int bias, int qmin, int qmax)
  91 {
  92     int qscale;
  93
  94     for(qscale=qmin; qscale<=qmax; qscale++){
  95         int i;
  96         if (s->fdct == ff_jpeg_fdct_islow) {
  97             for(i=0;i<64;i++) {
  98                 const int j= s->idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 100                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 101                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 102                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 103
 104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 105                                 (qscale * quant_matrix[j]));
 106             }
 107         } else if (s->fdct == fdct_ifast) {
 108             for(i=0;i<64;i++) {
 109                 const int j= s->idct_permutation[i];
 110                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 111                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 112                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 113                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 114
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 116                                 (aanscales[i] * qscale * quant_matrix[j]));
 117             }
 118         } else {
 119             for(i=0;i<64;i++) {
 120                 const int j= s->idct_permutation[i];
 121                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 122                    So 16           <= qscale * quant_matrix[i]             <= 7905
 123                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 124                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 125                 */
 126                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 127 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 128                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 129
 130                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 131                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 132             }
 133         }
 134     }
 135 }
 136 // move into common.c perhaps
 137 #define CHECKED_ALLOCZ(p, size)\
 138 {\
 139     p= av_mallocz(size);\
 140     if(p==NULL){\
 141         perror("malloc");\
 142         goto fail;\
 143     }\
 144 }
 145
 146 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
 147     int i;
 148     int end;
 149
 150     st->scantable= src_scantable;
 151
 152     for(i=0; i<64; i++){
 153         int j;
 154         j = src_scantable[i];
 155         st->permutated[i] = s->idct_permutation[j];
 156 #ifdef ARCH_POWERPC
 157         st->inverse[j] = i;
 158 #endif
 159     }
 160
 161     end=-1;
 162     for(i=0; i<64; i++){
 163         int j;
 164         j = st->permutated[i];
 165         if(j>end) end=j;
 166         st->raster_end[i]= end;
 167     }
 168 }
 169
 170 /* XXX: those functions should be suppressed ASAP when all IDCTs are
 171  converted */
 172 // *FIXME* this is ugly hack using local static
 173 static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 174 static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 175 static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
 176 {
 177     j_rev_dct (block);
 178     ff_put_pixels_clamped(block, dest, line_size);
 179 }
 180 static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
 181 {
 182     j_rev_dct (block);
 183     ff_add_pixels_clamped(block, dest, line_size);
 184 }
 185
 186 /* init common dct for both encoder and decoder */
 187 int DCT_common_init(MpegEncContext *s)
 188 {
 189     int i;
 190
 191     ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
 192     ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
 193
 194     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 195     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 196     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 197     s->dct_quantize= dct_quantize_c;
 198
 199     if(s->avctx->dct_algo==FF_DCT_FASTINT)
 200         s->fdct = fdct_ifast;
 201     else
 202         s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
 203
 204     if(s->avctx->idct_algo==FF_IDCT_INT){
 205         s->idct_put= ff_jref_idct_put;
 206         s->idct_add= ff_jref_idct_add;
 207         s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
 208     }else{ //accurate/default
 209         s->idct_put= simple_idct_put;
 210         s->idct_add= simple_idct_add;
 211         s->idct_permutation_type= FF_NO_IDCT_PERM;
 212     }
 213
 214 #ifdef HAVE_MMX
 215     MPV_common_init_mmx(s);
 216 #endif
 217 #ifdef ARCH_ALPHA
 218     MPV_common_init_axp(s);
 219 #endif
 220 #ifdef HAVE_MLIB
 221     MPV_common_init_mlib(s);
 222 #endif
 223 #ifdef HAVE_MMI
 224     MPV_common_init_mmi(s);
 225 #endif
 226 #ifdef ARCH_ARMV4L
 227     MPV_common_init_armv4l(s);
 228 #endif
 229 #ifdef ARCH_POWERPC
 230     MPV_common_init_ppc(s);
 231 #endif
 232
 233     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 234         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 235     }
 236
 237     switch(s->idct_permutation_type){
 238     case FF_NO_IDCT_PERM:
 239         for(i=0; i<64; i++)
 240             s->idct_permutation[i]= i;
 241         break;
 242     case FF_LIBMPEG2_IDCT_PERM:
 243         for(i=0; i<64; i++)
 244             s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 245         break;
 246     case FF_SIMPLE_IDCT_PERM:
 247         for(i=0; i<64; i++)
 248             s->idct_permutation[i]= simple_mmx_permutation[i];
 249         break;
 250     case FF_TRANSPOSE_IDCT_PERM:
 251         for(i=0; i<64; i++)
 252             s->idct_permutation[i]= ((i&7)<<3) | (i>>3);
 253         break;
 254     default:
 255         fprintf(stderr, "Internal error, IDCT permutation not set\n");
 256         return -1;
 257     }
 258
 259
 260     /* load & permutate scantables
 261        note: only wmv uses differnt ones
 262     */
 263     ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
 264     ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
 265     ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 266     ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
 267
 268     return 0;
 269 }
 270
 271 /**
 272  * allocates a Picture
 273  * The pixels are allocated/set by calling get_buffer() if shared=0
 274  */
 275 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 276
 277     if(shared){
 278         assert(pic->data[0]);
 279         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 280         pic->type= FF_BUFFER_TYPE_SHARED;
 281     }else{
 282         int r;
 283
 284         assert(!pic->data[0]);
 285
 286         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 287
 288         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 289             fprintf(stderr, "get_buffer() failed (%d %d %d %X)\n", r, pic->age, pic->type, (int)pic->data[0]);
 290             return -1;
 291         }
 292
 293         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 294             fprintf(stderr, "get_buffer() failed (stride changed)\n");
 295             return -1;
 296         }
 297
 298         if(pic->linesize[1] != pic->linesize[2]){
 299             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
 300             return -1;
 301         }
 302
 303         s->linesize  = pic->linesize[0];
 304         s->uvlinesize= pic->linesize[1];
 305     }
 306
 307     if(pic->qscale_table==NULL){
 308         if (s->encoding) {
 309             CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(INT16))
 310             CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16))
 311             CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(INT8))
 312         }
 313
 314         CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check
 315         CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(UINT8))
 316         pic->qstride= s->mb_width;
 317     }
 318
 319     return 0;
 320 fail: //for the CHECKED_ALLOCZ macro
 321     return -1;
 322 }
 323
 324 /**
 325  * deallocates a picture
 326  */
 327 static void free_picture(MpegEncContext *s, Picture *pic){
 328     int i;
 329
 330     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 331         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 332     }
 333
 334     av_freep(&pic->mb_var);
 335     av_freep(&pic->mc_mb_var);
 336     av_freep(&pic->mb_mean);
 337     av_freep(&pic->mbskip_table);
 338     av_freep(&pic->qscale_table);
 339
 340     if(pic->type == FF_BUFFER_TYPE_INTERNAL){
 341         for(i=0; i<4; i++){
 342             av_freep(&pic->base[i]);
 343             pic->data[i]= NULL;
 344         }
 345         av_freep(&pic->opaque);
 346         pic->type= 0;
 347     }else if(pic->type == FF_BUFFER_TYPE_SHARED){
 348         for(i=0; i<4; i++){
 349             pic->base[i]=
 350             pic->data[i]= NULL;
 351         }
 352         pic->type= 0;
 353     }
 354 }
 355
 356 /* init common structure for both encoder and decoder */
 357 int MPV_common_init(MpegEncContext *s)
 358 {
 359     int y_size, c_size, yc_size, i;
 360
 361     dsputil_init(&s->dsp, s->avctx->dsp_mask);
 362     DCT_common_init(s);
 363
 364     s->flags= s->avctx->flags;
 365
 366     s->mb_width  = (s->width  + 15) / 16;
 367     s->mb_height = (s->height + 15) / 16;
 368
 369     /* set default edge pos, will be overriden in decode_header if needed */
 370     s->h_edge_pos= s->mb_width*16;
 371     s->v_edge_pos= s->mb_height*16;
 372
 373     s->mb_num = s->mb_width * s->mb_height;
 374
 375     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 376     c_size = (s->mb_width + 2) * (s->mb_height + 2);
 377     yc_size = y_size + 2 * c_size;
 378
 379     /* convert fourcc to upper case */
 380     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)
 381                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
 382                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16)
 383                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
 384
 385     CHECKED_ALLOCZ(s->edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 386
 387     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 388
 389     if (s->encoding) {
 390         int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
 391
 392         /* Allocate MV tables */
 393         CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(INT16))
 394         CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(INT16))
 395         CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(INT16))
 396         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
 397         CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
 398         CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(INT16))
 399
 400         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 401         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t))
 402
 403         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 404         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 405
 406         if(s->codec_id==CODEC_ID_MPEG4){
 407             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 408             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 409         }
 410
 411         if(s->msmpeg4_version){
 412             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 413         }
 414         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 415     }
 416
 417     CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(UINT8))
 418
 419     if (s->out_format == FMT_H263 || s->encoding) {
 420         int size;
 421         /* Allocate MB type table */
 422         CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(UINT8))
 423
 424         /* MV prediction */
 425         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 426         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
 427     }
 428
 429     if(s->codec_id==CODEC_ID_MPEG4){
 430         /* interlaced direct mode decoding tables */
 431         CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16))
 432         CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8))
 433     }
 434     /* 4mv b frame decoding table */
 435     //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
 436     CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8))
 437     if (s->out_format == FMT_H263) {
 438         /* ac values */
 439         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(INT16) * 16);
 440         s->ac_val[1] = s->ac_val[0] + y_size;
 441         s->ac_val[2] = s->ac_val[1] + c_size;
 442
 443         /* cbp values */
 444         CHECKED_ALLOCZ(s->coded_block, y_size);
 445
 446         /* divx501 bitstream reorder buffer */
 447         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 448
 449         /* cbp, ac_pred, pred_dir */
 450         CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(UINT8))
 451         CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8))
 452     }
 453
 454     if (s->h263_pred || s->h263_plus || !s->encoding) {
 455         /* dc values */
 456         //MN: we need these for error resilience of intra-frames
 457         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(INT16));
 458         s->dc_val[1] = s->dc_val[0] + y_size;
 459         s->dc_val[2] = s->dc_val[1] + c_size;
 460         for(i=0;i<yc_size;i++)
 461             s->dc_val[0][i] = 1024;
 462     }
 463
 464     /* which mb is a intra block */
 465     CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
 466     memset(s->mbintra_table, 1, s->mb_num);
 467
 468     /* default structure is frame */
 469     s->picture_structure = PICT_FRAME;
 470
 471     /* init macroblock skip table */
 472     CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
 473     //Note the +1 is for a quicker mpeg4 slice_end detection
 474
 475     s->block= s->blocks[0];
 476
 477     s->parse_context.state= -1;
 478
 479     s->context_initialized = 1;
 480     return 0;
 481  fail:
 482     MPV_common_end(s);
 483     return -1;
 484 }
 485
 486
 487 //extern int sads;
 488
 489 /* init common structure for both encoder and decoder */
 490 void MPV_common_end(MpegEncContext *s)
 491 {
 492     int i;
 493
 494     av_freep(&s->mb_type);
 495     av_freep(&s->p_mv_table);
 496     av_freep(&s->b_forw_mv_table);
 497     av_freep(&s->b_back_mv_table);
 498     av_freep(&s->b_bidir_forw_mv_table);
 499     av_freep(&s->b_bidir_back_mv_table);
 500     av_freep(&s->b_direct_mv_table);
 501     av_freep(&s->motion_val);
 502     av_freep(&s->dc_val[0]);
 503     av_freep(&s->ac_val[0]);
 504     av_freep(&s->coded_block);
 505     av_freep(&s->mbintra_table);
 506     av_freep(&s->cbp_table);
 507     av_freep(&s->pred_dir_table);
 508     av_freep(&s->me.scratchpad);
 509     av_freep(&s->me.map);
 510     av_freep(&s->me.score_map);
 511
 512     av_freep(&s->mbskip_table);
 513     av_freep(&s->bitstream_buffer);
 514     av_freep(&s->tex_pb_buffer);
 515     av_freep(&s->pb2_buffer);
 516     av_freep(&s->edge_emu_buffer);
 517     av_freep(&s->co_located_type_table);
 518     av_freep(&s->field_mv_table);
 519     av_freep(&s->field_select_table);
 520     av_freep(&s->avctx->stats_out);
 521     av_freep(&s->ac_stats);
 522     av_freep(&s->error_status_table);
 523
 524     for(i=0; i<MAX_PICTURE_COUNT; i++){
 525         free_picture(s, &s->picture[i]);
 526     }
 527     s->context_initialized = 0;
 528 }
 529
 530 /* init video encoder */
 531 int MPV_encode_init(AVCodecContext *avctx)
 532 {
 533     MpegEncContext *s = avctx->priv_data;
 534     int i;
 535
 536     avctx->pix_fmt = PIX_FMT_YUV420P;
 537
 538     s->bit_rate = avctx->bit_rate;
 539     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 540     s->frame_rate = avctx->frame_rate;
 541     s->width = avctx->width;
 542     s->height = avctx->height;
 543     if(avctx->gop_size > 600){
 544         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 545         avctx->gop_size=600;
 546     }
 547     s->gop_size = avctx->gop_size;
 548     s->rtp_mode = avctx->rtp_mode;
 549     s->rtp_payload_size = avctx->rtp_payload_size;
 550     if (avctx->rtp_callback)
 551         s->rtp_callback = avctx->rtp_callback;
 552     s->qmin= avctx->qmin;
 553     s->qmax= avctx->qmax;
 554     s->max_qdiff= avctx->max_qdiff;
 555     s->qcompress= avctx->qcompress;
 556     s->qblur= avctx->qblur;
 557     s->avctx = avctx;
 558     s->flags= avctx->flags;
 559     s->max_b_frames= avctx->max_b_frames;
 560     s->b_frame_strategy= avctx->b_frame_strategy;
 561     s->codec_id= avctx->codec->id;
 562     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 563     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 564     s->strict_std_compliance= avctx->strict_std_compliance;
 565     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 566     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 567     s->mpeg_quant= avctx->mpeg_quant;
 568
 569     if (s->gop_size <= 1) {
 570         s->intra_only = 1;
 571         s->gop_size = 12;
 572     } else {
 573         s->intra_only = 0;
 574     }
 575
 576     s->me_method = avctx->me_method;
 577
 578     /* Fixed QSCALE */
 579     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 580
 581     s->adaptive_quant= (   s->avctx->lumi_masking
 582                         || s->avctx->dark_masking
 583                         || s->avctx->temporal_cplx_masking
 584                         || s->avctx->spatial_cplx_masking
 585                         || s->avctx->p_masking)
 586                        && !s->fixed_qscale;
 587
 588     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 589
 590     switch(avctx->codec->id) {
 591     case CODEC_ID_MPEG1VIDEO:
 592         s->out_format = FMT_MPEG1;
 593         avctx->delay=0; //FIXME not sure, should check the spec
 594         break;
 595     case CODEC_ID_MJPEG:
 596         s->out_format = FMT_MJPEG;
 597         s->intra_only = 1; /* force intra only for jpeg */
 598         s->mjpeg_write_tables = 1; /* write all tables */
 599         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 600         s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
 601         s->mjpeg_vsample[1] = 1; /* the only currently supported values */
 602         s->mjpeg_vsample[2] = 1;
 603         s->mjpeg_hsample[0] = 2;
 604         s->mjpeg_hsample[1] = 1;
 605         s->mjpeg_hsample[2] = 1;
 606         if (mjpeg_init(s) < 0)
 607             return -1;
 608         avctx->delay=0;
 609         s->low_delay=1;
 610         break;
 611     case CODEC_ID_H263:
 612         if (h263_get_picture_format(s->width, s->height) == 7) {
 613             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 614             return -1;
 615         }
 616         s->out_format = FMT_H263;
 617         avctx->delay=0;
 618         s->low_delay=1;
 619         break;
 620     case CODEC_ID_H263P:
 621         s->out_format = FMT_H263;
 622         s->h263_plus = 1;
 623         s->unrestricted_mv = 1;
 624         s->h263_aic = 1;
 625
 626         /* These are just to be sure */
 627         s->umvplus = 0;
 628         s->umvplus_dec = 0;
 629         avctx->delay=0;
 630         s->low_delay=1;
 631         break;
 632     case CODEC_ID_RV10:
 633         s->out_format = FMT_H263;
 634         s->h263_rv10 = 1;
 635         avctx->delay=0;
 636         s->low_delay=1;
 637         break;
 638     case CODEC_ID_MPEG4:
 639         s->out_format = FMT_H263;
 640         s->h263_pred = 1;
 641         s->unrestricted_mv = 1;
 642         s->low_delay= s->max_b_frames ? 0 : 1;
 643         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 644         break;
 645     case CODEC_ID_MSMPEG4V1:
 646         s->out_format = FMT_H263;
 647         s->h263_msmpeg4 = 1;
 648         s->h263_pred = 1;
 649         s->unrestricted_mv = 1;
 650         s->msmpeg4_version= 1;
 651         avctx->delay=0;
 652         s->low_delay=1;
 653         break;
 654     case CODEC_ID_MSMPEG4V2:
 655         s->out_format = FMT_H263;
 656         s->h263_msmpeg4 = 1;
 657         s->h263_pred = 1;
 658         s->unrestricted_mv = 1;
 659         s->msmpeg4_version= 2;
 660         avctx->delay=0;
 661         s->low_delay=1;
 662         break;
 663     case CODEC_ID_MSMPEG4V3:
 664         s->out_format = FMT_H263;
 665         s->h263_msmpeg4 = 1;
 666         s->h263_pred = 1;
 667         s->unrestricted_mv = 1;
 668         s->msmpeg4_version= 3;
 669         avctx->delay=0;
 670         s->low_delay=1;
 671         break;
 672     case CODEC_ID_WMV1:
 673         s->out_format = FMT_H263;
 674         s->h263_msmpeg4 = 1;
 675         s->h263_pred = 1;
 676         s->unrestricted_mv = 1;
 677         s->msmpeg4_version= 4;
 678         avctx->delay=0;
 679         s->low_delay=1;
 680         break;
 681     case CODEC_ID_WMV2:
 682         s->out_format = FMT_H263;
 683         s->h263_msmpeg4 = 1;
 684         s->h263_pred = 1;
 685         s->unrestricted_mv = 1;
 686         s->msmpeg4_version= 5;
 687         avctx->delay=0;
 688         s->low_delay=1;
 689         break;
 690     default:
 691         return -1;
 692     }
 693
 694     { /* set up some save defaults, some codecs might override them later */
 695         static int done=0;
 696         if(!done){
 697             int i;
 698             done=1;
 699
 700             default_mv_penalty= av_mallocz( sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 701             memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1));
 702             memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
 703
 704             for(i=-16; i<16; i++){
 705                 default_fcode_tab[i + MAX_MV]= 1;
 706             }
 707         }
 708     }
 709     s->me.mv_penalty= default_mv_penalty;
 710     s->fcode_tab= default_fcode_tab;
 711     s->y_dc_scale_table=
 712     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 713
 714     /* dont use mv_penalty table for crap MV as it would be confused */
 715     //FIXME remove after fixing / removing old ME
 716     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
 717
 718     s->encoding = 1;
 719
 720     /* init */
 721     if (MPV_common_init(s) < 0)
 722         return -1;
 723
 724     ff_init_me(s);
 725
 726 #ifdef CONFIG_ENCODERS
 727     if (s->out_format == FMT_H263)
 728         h263_encode_init(s);
 729     else if (s->out_format == FMT_MPEG1)
 730         ff_mpeg1_encode_init(s);
 731     if(s->msmpeg4_version)
 732         ff_msmpeg4_encode_init(s);
 733 #endif
 734
 735     /* init default q matrix */
 736     for(i=0;i<64;i++) {
 737         int j= s->idct_permutation[i];
 738         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 739             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 740             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 741         }else if(s->out_format == FMT_H263){
 742             s->intra_matrix[j] =
 743             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 744         }else{ /* mpeg1 */
 745             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 746             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 747         }
 748     }
 749
 750     /* precompute matrix */
 751     /* for mjpeg, we do include qscale in the matrix */
 752     if (s->out_format != FMT_MJPEG) {
 753         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 754                        s->intra_matrix, s->intra_quant_bias, 1, 31);
 755         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 756                        s->inter_matrix, s->inter_quant_bias, 1, 31);
 757     }
 758
 759     if(ff_rate_control_init(s) < 0)
 760         return -1;
 761
 762     s->picture_number = 0;
 763     s->picture_in_gop_number = 0;
 764     s->fake_picture_number = 0;
 765     /* motion detector init */
 766     s->f_code = 1;
 767     s->b_code = 1;
 768
 769     return 0;
 770 }
 771
 772 int MPV_encode_end(AVCodecContext *avctx)
 773 {
 774     MpegEncContext *s = avctx->priv_data;
 775
 776 #ifdef STATS
 777     print_stats();
 778 #endif
 779
 780     ff_rate_control_uninit(s);
 781
 782     MPV_common_end(s);
 783     if (s->out_format == FMT_MJPEG)
 784         mjpeg_close(s);
 785
 786     return 0;
 787 }
 788
 789 /* draw the edges of width 'w' of an image of size width, height */
 790 //FIXME check that this is ok for mpeg4 interlaced
 791 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
 792 {
 793     UINT8 *ptr, *last_line;
 794     int i;
 795
 796     last_line = buf + (height - 1) * wrap;
 797     for(i=0;i<w;i++) {
 798         /* top and bottom */
 799         memcpy(buf - (i + 1) * wrap, buf, width);
 800         memcpy(last_line + (i + 1) * wrap, last_line, width);
 801     }
 802     /* left and right */
 803     ptr = buf;
 804     for(i=0;i<height;i++) {
 805         memset(ptr - w, ptr[0], w);
 806         memset(ptr + width, ptr[width-1], w);
 807         ptr += wrap;
 808     }
 809     /* corners */
 810     for(i=0;i<w;i++) {
 811         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 812         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 813         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 814         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 815     }
 816 }
 817
 818 static int find_unused_picture(MpegEncContext *s, int shared){
 819     int i;
 820
 821     if(shared){
 822         for(i=0; i<MAX_PICTURE_COUNT; i++){
 823             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
 824         }
 825     }else{
 826         for(i=0; i<MAX_PICTURE_COUNT; i++){
 827             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break;
 828         }
 829         for(i=0; i<MAX_PICTURE_COUNT; i++){
 830             if(s->picture[i].data[0]==NULL) break;
 831         }
 832     }
 833
 834     assert(i<MAX_PICTURE_COUNT);
 835     return i;
 836 }
 837
 838 /* generic function for encode/decode called before a frame is coded/decoded */
 839 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 840 {
 841     int i;
 842     AVFrame *pic;
 843
 844     s->mb_skiped = 0;
 845
 846     /* mark&release old frames */
 847     if (s->pict_type != B_TYPE && s->last_picture.data[0]) {
 848         for(i=0; i<MAX_PICTURE_COUNT; i++){
 849 //printf("%8X %d %d %X %X\n", s->picture[i].data[0], s->picture[i].type, i, s->next_picture.data[0], s->last_picture.data[0]);
 850             if(s->picture[i].data[0] == s->last_picture.data[0]){
 851 //                s->picture[i].reference=0;
 852                 avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 853                 break;
 854             }
 855         }
 856         assert(i<MAX_PICTURE_COUNT);
 857
 858         /* release forgotten pictures */
 859         /* if(mpeg124/h263) */
 860         if(!s->encoding){
 861             for(i=0; i<MAX_PICTURE_COUNT; i++){
 862                 if(s->picture[i].data[0] && s->picture[i].data[0] != s->next_picture.data[0] && s->picture[i].reference){
 863                     fprintf(stderr, "releasing zombie picture\n");
 864                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 865                 }
 866             }
 867         }
 868     }
 869 alloc:
 870     if(!s->encoding){
 871         i= find_unused_picture(s, 0);
 872
 873         pic= (AVFrame*)&s->picture[i];
 874         pic->reference= s->pict_type != B_TYPE;
 875         pic->coded_picture_number= s->current_picture.coded_picture_number+1;
 876
 877         alloc_picture(s, (Picture*)pic, 0);
 878
 879         s->current_picture= s->picture[i];
 880     }
 881
 882     if (s->pict_type != B_TYPE) {
 883         s->last_picture= s->next_picture;
 884         s->next_picture= s->current_picture;
 885     }
 886
 887     if(s->pict_type != I_TYPE && s->last_picture.data[0]==NULL){
 888         fprintf(stderr, "warning: first frame is no keyframe\n");
 889         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
 890         goto alloc;
 891     }
 892
 893     s->hurry_up= s->avctx->hurry_up;
 894     s->error_resilience= avctx->error_resilience;
 895
 896     /* set dequantizer, we cant do it during init as it might change for mpeg4
 897        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
 898     if(s->out_format == FMT_H263){
 899         if(s->mpeg_quant)
 900             s->dct_unquantize = s->dct_unquantize_mpeg2;
 901         else
 902             s->dct_unquantize = s->dct_unquantize_h263;
 903     }else
 904         s->dct_unquantize = s->dct_unquantize_mpeg1;
 905
 906     return 0;
 907 }
 908
 909 /* generic function for encode/decode called after a frame has been coded/decoded */
 910 void MPV_frame_end(MpegEncContext *s)
 911 {
 912     int i;
 913     /* draw edge for correct motion prediction if outside */
 914     if(s->codec_id!=CODEC_ID_SVQ1){
 915         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
 916             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
 917             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 918             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 919         }
 920     }
 921     emms_c();
 922
 923     s->last_pict_type    = s->pict_type;
 924     if(s->pict_type!=B_TYPE){
 925         s->last_non_b_pict_type= s->pict_type;
 926     }
 927
 928     s->current_picture.quality= s->qscale; //FIXME get average of qscale_table
 929     s->current_picture.pict_type= s->pict_type;
 930     s->current_picture.key_frame= s->pict_type == I_TYPE;
 931
 932     /* copy back current_picture variables */
 933     for(i=0; i<MAX_PICTURE_COUNT; i++){
 934         if(s->picture[i].data[0] == s->current_picture.data[0]){
 935             s->picture[i]= s->current_picture;
 936             break;
 937         }
 938     }
 939     assert(i<MAX_PICTURE_COUNT);
 940
 941     /* release non refernce frames */
 942     for(i=0; i<MAX_PICTURE_COUNT; i++){
 943         if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
 944             s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
 945     }
 946     if(s->avctx->debug&FF_DEBUG_SKIP){
 947         int x,y;
 948         for(y=0; y<s->mb_height; y++){
 949             for(x=0; x<s->mb_width; x++){
 950                 int count= s->mbskip_table[x + y*s->mb_width];
 951                 if(count>9) count=9;
 952                 printf(" %1d", count);
 953             }
 954             printf("\n");
 955         }
 956         printf("pict type: %d\n", s->pict_type);
 957     }
 958 }
 959
 960 static int get_sae(uint8_t *src, int ref, int stride){
 961     int x,y;
 962     int acc=0;
 963
 964     for(y=0; y<16; y++){
 965         for(x=0; x<16; x++){
 966             acc+= ABS(src[x+y*stride] - ref);
 967         }
 968     }
 969
 970     return acc;
 971 }
 972
 973 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
 974     int x, y, w, h;
 975     int acc=0;
 976
 977     w= s->width &~15;
 978     h= s->height&~15;
 979
 980     for(y=0; y<h; y+=16){
 981         for(x=0; x<w; x+=16){
 982             int offset= x + y*stride;
 983             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
 984             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
 985             int sae = get_sae(src + offset, mean, stride);
 986
 987             acc+= sae + 500 < sad;
 988         }
 989     }
 990     return acc;
 991 }
 992
 993
 994 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
 995     AVFrame *pic;
 996     int i;
 997     const int encoding_delay= s->max_b_frames;
 998     int direct=1;
 999
1000     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1001     if(pic_arg->linesize[0] != s->linesize) direct=0;
1002     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1003     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1004
1005 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1006
1007     if(direct){
1008         i= find_unused_picture(s, 1);
1009
1010         pic= (AVFrame*)&s->picture[i];
1011         pic->reference= 1;
1012
1013         for(i=0; i<4; i++){
1014             pic->data[i]= pic_arg->data[i];
1015             pic->linesize[i]= pic_arg->linesize[i];
1016         }
1017         alloc_picture(s, (Picture*)pic, 1);
1018     }else{
1019         i= find_unused_picture(s, 0);
1020
1021         pic= (AVFrame*)&s->picture[i];
1022         pic->reference= 1;
1023
1024         alloc_picture(s, (Picture*)pic, 0);
1025
1026         if(   pic->data[0] == pic_arg->data[0]
1027            && pic->data[1] == pic_arg->data[1]
1028            && pic->data[2] == pic_arg->data[2]){
1029        // empty
1030         }else{
1031             int h_chroma_shift, v_chroma_shift;
1032
1033             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1034
1035             for(i=0; i<3; i++){
1036                 int src_stride= pic_arg->linesize[i];
1037                 int dst_stride= i ? s->uvlinesize : s->linesize;
1038                 int h_shift= i ? h_chroma_shift : 0;
1039                 int v_shift= i ? v_chroma_shift : 0;
1040                 int w= s->width >>h_shift;
1041                 int h= s->height>>v_shift;
1042                 uint8_t *src= pic_arg->data[i];
1043                 uint8_t *dst= pic->data[i];
1044
1045                 if(src_stride==dst_stride)
1046                     memcpy(dst, src, src_stride*h);
1047                 else{
1048                     while(h--){
1049                         memcpy(dst, src, w);
1050                         dst += dst_stride;
1051                         src += src_stride;
1052                     }
1053                 }
1054             }
1055         }
1056     }
1057     pic->quality= pic_arg->quality;
1058     pic->pict_type= pic_arg->pict_type;
1059
1060     if(s->input_picture[encoding_delay])
1061         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1062
1063     /* shift buffer entries */
1064     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1065         s->input_picture[i-1]= s->input_picture[i];
1066
1067     s->input_picture[encoding_delay]= (Picture*)pic;
1068
1069     return 0;
1070 }
1071
1072 static void select_input_picture(MpegEncContext *s){
1073     int i;
1074     const int encoding_delay= s->max_b_frames;
1075     int coded_pic_num=0;
1076
1077     if(s->reordered_input_picture[0])
1078         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1079
1080     for(i=1; i<MAX_PICTURE_COUNT; i++)
1081         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1082     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1083
1084     /* set next picture types & ordering */
1085     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1086         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture.data[0]==NULL || s->intra_only){
1087             s->reordered_input_picture[0]= s->input_picture[0];
1088             s->reordered_input_picture[0]->pict_type= I_TYPE;
1089             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1090         }else{
1091             int b_frames;
1092
1093             if(s->flags&CODEC_FLAG_PASS2){
1094                 for(i=0; i<s->max_b_frames+1; i++){
1095                     int pict_num= s->input_picture[0]->display_picture_number + i;
1096                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1097                     s->input_picture[i]->pict_type= pict_type;
1098
1099                     if(i + 1 >= s->rc_context.num_entries) break;
1100                 }
1101             }
1102
1103             if(s->input_picture[0]->pict_type){
1104                 /* user selected pict_type */
1105                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1106                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1107                 }
1108
1109                 if(b_frames > s->max_b_frames){
1110                     fprintf(stderr, "warning, too many bframes in a row\n");
1111                     b_frames = s->max_b_frames;
1112                 }
1113             }else if(s->b_frame_strategy==0){
1114                 b_frames= s->max_b_frames;
1115             }else if(s->b_frame_strategy==1){
1116                 for(i=1; i<s->max_b_frames+1; i++){
1117                     if(s->input_picture[i]->b_frame_score==0){
1118                         s->input_picture[i]->b_frame_score=
1119                             get_intra_count(s, s->input_picture[i  ]->data[0],
1120                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1121                     }
1122                 }
1123                 for(i=0; i<s->max_b_frames; i++){
1124                     if(s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1125                 }
1126
1127                 b_frames= FFMAX(0, i-1);
1128
1129                 /* reset scores */
1130                 for(i=0; i<b_frames+1; i++){
1131                     s->input_picture[i]->b_frame_score=0;
1132                 }
1133             }else{
1134                 fprintf(stderr, "illegal b frame strategy\n");
1135                 b_frames=0;
1136             }
1137
1138             emms_c();
1139 //static int b_count=0;
1140 //b_count+= b_frames;
1141 //printf("b_frames: %d\n", b_count);
1142
1143             s->reordered_input_picture[0]= s->input_picture[b_frames];
1144             if(   s->picture_in_gop_number + b_frames >= s->gop_size
1145                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1146                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1147             else
1148                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1149             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1150             for(i=0; i<b_frames; i++){
1151                 coded_pic_num++;
1152                 s->reordered_input_picture[i+1]= s->input_picture[i];
1153                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1154                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1155             }
1156         }
1157     }
1158
1159     if(s->reordered_input_picture[0]){
1160        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE;
1161
1162         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1163             int i= find_unused_picture(s, 0);
1164             Picture *pic= &s->picture[i];
1165
1166             s->new_picture= *s->reordered_input_picture[0];
1167
1168             /* mark us unused / free shared pic */
1169             for(i=0; i<4; i++)
1170                 s->reordered_input_picture[0]->data[i]= NULL;
1171             s->reordered_input_picture[0]->type= 0;
1172
1173             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1174             pic->quality   = s->reordered_input_picture[0]->quality;
1175             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1176             pic->reference = s->reordered_input_picture[0]->reference;
1177
1178             alloc_picture(s, pic, 0);
1179
1180             s->current_picture= *pic;
1181         }else{
1182             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1183                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1184
1185             s->new_picture= *s->reordered_input_picture[0];
1186
1187             for(i=0; i<4; i++){
1188                 s->reordered_input_picture[0]->data[i]-=16; //FIXME dirty
1189             }
1190             s->current_picture= *s->reordered_input_picture[0];
1191         }
1192
1193         s->picture_number= s->new_picture.display_picture_number;
1194 //printf("dpn:%d\n", s->picture_number);
1195     }else{
1196        memset(&s->new_picture, 0, sizeof(Picture));
1197     }
1198 }
1199
1200 int MPV_encode_picture(AVCodecContext *avctx,
1201                        unsigned char *buf, int buf_size, void *data)
1202 {
1203     MpegEncContext *s = avctx->priv_data;
1204     AVFrame *pic_arg = data;
1205     int i;
1206
1207     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1208
1209     s->picture_in_gop_number++;
1210
1211     load_input_picture(s, pic_arg);
1212
1213     select_input_picture(s);
1214
1215     /* output? */
1216     if(s->new_picture.data[0]){
1217
1218         s->pict_type= s->new_picture.pict_type;
1219         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1220             s->qscale= (int)(s->new_picture.quality+0.5);
1221             assert(s->qscale);
1222         }
1223 //emms_c();
1224 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1225         MPV_frame_start(s, avctx);
1226
1227         encode_picture(s, s->picture_number);
1228
1229         avctx->real_pict_num  = s->picture_number;
1230         avctx->header_bits = s->header_bits;
1231         avctx->mv_bits     = s->mv_bits;
1232         avctx->misc_bits   = s->misc_bits;
1233         avctx->i_tex_bits  = s->i_tex_bits;
1234         avctx->p_tex_bits  = s->p_tex_bits;
1235         avctx->i_count     = s->i_count;
1236         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1237         avctx->skip_count  = s->skip_count;
1238
1239         MPV_frame_end(s);
1240
1241         if (s->out_format == FMT_MJPEG)
1242             mjpeg_picture_trailer(s);
1243
1244         if(s->flags&CODEC_FLAG_PASS1)
1245             ff_write_pass1_stats(s);
1246     }
1247
1248     s->input_picture_number++;
1249
1250     flush_put_bits(&s->pb);
1251     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1252
1253     s->total_bits += s->frame_bits;
1254     avctx->frame_bits  = s->frame_bits;
1255
1256     for(i=0; i<4; i++){
1257         avctx->error[i] += s->current_picture.error[i];
1258     }
1259
1260     return pbBufPtr(&s->pb) - s->pb.buf;
1261 }
1262
1263 static inline void gmc1_motion(MpegEncContext *s,
1264                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1265                                int dest_offset,
1266                                UINT8 **ref_picture, int src_offset)
1267 {
1268     UINT8 *ptr;
1269     int offset, src_x, src_y, linesize, uvlinesize;
1270     int motion_x, motion_y;
1271     int emu=0;
1272
1273     motion_x= s->sprite_offset[0][0];
1274     motion_y= s->sprite_offset[0][1];
1275     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1276     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1277     motion_x<<=(3-s->sprite_warping_accuracy);
1278     motion_y<<=(3-s->sprite_warping_accuracy);
1279     src_x = clip(src_x, -16, s->width);
1280     if (src_x == s->width)
1281         motion_x =0;
1282     src_y = clip(src_y, -16, s->height);
1283     if (src_y == s->height)
1284         motion_y =0;
1285
1286     linesize = s->linesize;
1287     uvlinesize = s->uvlinesize;
1288
1289     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1290
1291     dest_y+=dest_offset;
1292     if(s->flags&CODEC_FLAG_EMU_EDGE){
1293         if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
1294                               || src_y + (motion_y&15) + 16 > s->v_edge_pos){
1295             ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1296             ptr= s->edge_emu_buffer;
1297             emu=1;
1298         }
1299     }
1300
1301     if((motion_x|motion_y)&7){
1302         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1303         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1304     }else{
1305         int dxy;
1306
1307         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1308         if (s->no_rounding){
1309             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1310         }else{
1311             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1312         }
1313     }
1314
1315     if(s->flags&CODEC_FLAG_GRAY) return;
1316
1317     motion_x= s->sprite_offset[1][0];
1318     motion_y= s->sprite_offset[1][1];
1319     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1320     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1321     motion_x<<=(3-s->sprite_warping_accuracy);
1322     motion_y<<=(3-s->sprite_warping_accuracy);
1323     src_x = clip(src_x, -8, s->width>>1);
1324     if (src_x == s->width>>1)
1325         motion_x =0;
1326     src_y = clip(src_y, -8, s->height>>1);
1327     if (src_y == s->height>>1)
1328         motion_y =0;
1329
1330     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1331     ptr = ref_picture[1] + offset;
1332     if(emu){
1333         ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1334         ptr= s->edge_emu_buffer;
1335     }
1336     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1337
1338     ptr = ref_picture[2] + offset;
1339     if(emu){
1340         ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1341         ptr= s->edge_emu_buffer;
1342     }
1343     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1344
1345     return;
1346 }
1347
1348 static inline void gmc_motion(MpegEncContext *s,
1349                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1350                                int dest_offset,
1351                                UINT8 **ref_picture, int src_offset)
1352 {
1353     UINT8 *ptr;
1354     int linesize, uvlinesize;
1355     const int a= s->sprite_warping_accuracy;
1356     int ox, oy;
1357
1358     linesize = s->linesize;
1359     uvlinesize = s->uvlinesize;
1360
1361     ptr = ref_picture[0] + src_offset;
1362
1363     dest_y+=dest_offset;
1364
1365     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1366     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1367
1368     s->dsp.gmc(dest_y, ptr, linesize, 16,
1369            ox,
1370            oy,
1371            s->sprite_delta[0][0], s->sprite_delta[0][1],
1372            s->sprite_delta[1][0], s->sprite_delta[1][1],
1373            a+1, (1<<(2*a+1)) - s->no_rounding,
1374            s->h_edge_pos, s->v_edge_pos);
1375     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1376            ox + s->sprite_delta[0][0]*8,
1377            oy + s->sprite_delta[1][0]*8,
1378            s->sprite_delta[0][0], s->sprite_delta[0][1],
1379            s->sprite_delta[1][0], s->sprite_delta[1][1],
1380            a+1, (1<<(2*a+1)) - s->no_rounding,
1381            s->h_edge_pos, s->v_edge_pos);
1382
1383     if(s->flags&CODEC_FLAG_GRAY) return;
1384
1385
1386     dest_cb+=dest_offset>>1;
1387     dest_cr+=dest_offset>>1;
1388
1389     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1390     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1391
1392     ptr = ref_picture[1] + (src_offset>>1);
1393     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1394            ox,
1395            oy,
1396            s->sprite_delta[0][0], s->sprite_delta[0][1],
1397            s->sprite_delta[1][0], s->sprite_delta[1][1],
1398            a+1, (1<<(2*a+1)) - s->no_rounding,
1399            s->h_edge_pos>>1, s->v_edge_pos>>1);
1400
1401     ptr = ref_picture[2] + (src_offset>>1);
1402     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1403            ox,
1404            oy,
1405            s->sprite_delta[0][0], s->sprite_delta[0][1],
1406            s->sprite_delta[1][0], s->sprite_delta[1][1],
1407            a+1, (1<<(2*a+1)) - s->no_rounding,
1408            s->h_edge_pos>>1, s->v_edge_pos>>1);
1409 }
1410
1411
1412 void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
1413                                     int src_x, int src_y, int w, int h){
1414     int x, y;
1415     int start_y, start_x, end_y, end_x;
1416     UINT8 *buf= s->edge_emu_buffer;
1417
1418     if(src_y>= h){
1419         src+= (h-1-src_y)*linesize;
1420         src_y=h-1;
1421     }else if(src_y<=-block_h){
1422         src+= (1-block_h-src_y)*linesize;
1423         src_y=1-block_h;
1424     }
1425     if(src_x>= w){
1426         src+= (w-1-src_x);
1427         src_x=w-1;
1428     }else if(src_x<=-block_w){
1429         src+= (1-block_w-src_x);
1430         src_x=1-block_w;
1431     }
1432
1433     start_y= FFMAX(0, -src_y);
1434     start_x= FFMAX(0, -src_x);
1435     end_y= FFMIN(block_h, h-src_y);
1436     end_x= FFMIN(block_w, w-src_x);
1437
1438     // copy existing part
1439     for(y=start_y; y<end_y; y++){
1440         for(x=start_x; x<end_x; x++){
1441             buf[x + y*linesize]= src[x + y*linesize];
1442         }
1443     }
1444
1445     //top
1446     for(y=0; y<start_y; y++){
1447         for(x=start_x; x<end_x; x++){
1448             buf[x + y*linesize]= buf[x + start_y*linesize];
1449         }
1450     }
1451
1452     //bottom
1453     for(y=end_y; y<block_h; y++){
1454         for(x=start_x; x<end_x; x++){
1455             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1456         }
1457     }
1458
1459     for(y=0; y<block_h; y++){
1460        //left
1461         for(x=0; x<start_x; x++){
1462             buf[x + y*linesize]= buf[start_x + y*linesize];
1463         }
1464
1465        //right
1466         for(x=end_x; x<block_w; x++){
1467             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1468         }
1469     }
1470 }
1471
1472
1473 /* apply one mpeg motion vector to the three components */
1474 static inline void mpeg_motion(MpegEncContext *s,
1475                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1476                                int dest_offset,
1477                                UINT8 **ref_picture, int src_offset,
1478                                int field_based, op_pixels_func (*pix_op)[4],
1479                                int motion_x, int motion_y, int h)
1480 {
1481     UINT8 *ptr;
1482     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1483     int emu=0;
1484 #if 0
1485 if(s->quarter_sample)
1486 {
1487     motion_x>>=1;
1488     motion_y>>=1;
1489 }
1490 #endif
1491     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1492     src_x = s->mb_x * 16 + (motion_x >> 1);
1493     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1494
1495     /* WARNING: do no forget half pels */
1496     height = s->height >> field_based;
1497     v_edge_pos = s->v_edge_pos >> field_based;
1498     src_x = clip(src_x, -16, s->width);
1499     if (src_x == s->width)
1500         dxy &= ~1;
1501     src_y = clip(src_y, -16, height);
1502     if (src_y == height)
1503         dxy &= ~2;
1504     linesize   = s->linesize << field_based;
1505     uvlinesize = s->uvlinesize << field_based;
1506     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1507     dest_y += dest_offset;
1508
1509     if(s->flags&CODEC_FLAG_EMU_EDGE){
1510         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1511                               || src_y + (motion_y&1) + h  > v_edge_pos){
1512             ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
1513                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1514             ptr= s->edge_emu_buffer + src_offset;
1515             emu=1;
1516         }
1517     }
1518     pix_op[0][dxy](dest_y, ptr, linesize, h);
1519
1520     if(s->flags&CODEC_FLAG_GRAY) return;
1521
1522     if (s->out_format == FMT_H263) {
1523         dxy = 0;
1524         if ((motion_x & 3) != 0)
1525             dxy |= 1;
1526         if ((motion_y & 3) != 0)
1527             dxy |= 2;
1528         mx = motion_x >> 2;
1529         my = motion_y >> 2;
1530     } else {
1531         mx = motion_x / 2;
1532         my = motion_y / 2;
1533         dxy = ((my & 1) << 1) | (mx & 1);
1534         mx >>= 1;
1535         my >>= 1;
1536     }
1537
1538     src_x = s->mb_x * 8 + mx;
1539     src_y = s->mb_y * (8 >> field_based) + my;
1540     src_x = clip(src_x, -8, s->width >> 1);
1541     if (src_x == (s->width >> 1))
1542         dxy &= ~1;
1543     src_y = clip(src_y, -8, height >> 1);
1544     if (src_y == (height >> 1))
1545         dxy &= ~2;
1546     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1547     ptr = ref_picture[1] + offset;
1548     if(emu){
1549         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1550                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1551         ptr= s->edge_emu_buffer + (src_offset >> 1);
1552     }
1553     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1554
1555     ptr = ref_picture[2] + offset;
1556     if(emu){
1557         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1558                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1559         ptr= s->edge_emu_buffer + (src_offset >> 1);
1560     }
1561     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1562 }
1563
1564 static inline void qpel_motion(MpegEncContext *s,
1565                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1566                                int dest_offset,
1567                                UINT8 **ref_picture, int src_offset,
1568                                int field_based, op_pixels_func (*pix_op)[4],
1569                                qpel_mc_func (*qpix_op)[16],
1570                                int motion_x, int motion_y, int h)
1571 {
1572     UINT8 *ptr;
1573     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1574     int emu=0;
1575
1576     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1577     src_x = s->mb_x * 16 + (motion_x >> 2);
1578     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1579
1580     height = s->height >> field_based;
1581     v_edge_pos = s->v_edge_pos >> field_based;
1582     src_x = clip(src_x, -16, s->width);
1583     if (src_x == s->width)
1584         dxy &= ~3;
1585     src_y = clip(src_y, -16, height);
1586     if (src_y == height)
1587         dxy &= ~12;
1588     linesize = s->linesize << field_based;
1589     uvlinesize = s->uvlinesize << field_based;
1590     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1591     dest_y += dest_offset;
1592 //printf("%d %d %d\n", src_x, src_y, dxy);
1593
1594     if(s->flags&CODEC_FLAG_EMU_EDGE){
1595         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1596                               || src_y + (motion_y&3) + h  > v_edge_pos){
1597             ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
1598                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1599             ptr= s->edge_emu_buffer + src_offset;
1600             emu=1;
1601         }
1602     }
1603     if(!field_based)
1604         qpix_op[0][dxy](dest_y, ptr, linesize);
1605     else{
1606         //damn interlaced mode
1607         //FIXME boundary mirroring is not exactly correct here
1608         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1609         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1610     }
1611
1612     if(s->flags&CODEC_FLAG_GRAY) return;
1613
1614     if(field_based){
1615         mx= motion_x/2;
1616         my= motion_y>>1;
1617     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
1618         mx= (motion_x>>1)|(motion_x&1);
1619         my= (motion_y>>1)|(motion_y&1);
1620     }else{
1621         mx= motion_x/2;
1622         my= motion_y/2;
1623     }
1624     mx= (mx>>1)|(mx&1);
1625     my= (my>>1)|(my&1);
1626     dxy= (mx&1) | ((my&1)<<1);
1627     mx>>=1;
1628     my>>=1;
1629
1630     src_x = s->mb_x * 8 + mx;
1631     src_y = s->mb_y * (8 >> field_based) + my;
1632     src_x = clip(src_x, -8, s->width >> 1);
1633     if (src_x == (s->width >> 1))
1634         dxy &= ~1;
1635     src_y = clip(src_y, -8, height >> 1);
1636     if (src_y == (height >> 1))
1637         dxy &= ~2;
1638
1639     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1640     ptr = ref_picture[1] + offset;
1641     if(emu){
1642         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
1643                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1644         ptr= s->edge_emu_buffer + (src_offset >> 1);
1645     }
1646     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1647
1648     ptr = ref_picture[2] + offset;
1649     if(emu){
1650         ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
1651                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1652         ptr= s->edge_emu_buffer + (src_offset >> 1);
1653     }
1654     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1655 }
1656
1657
1658 static inline void MPV_motion(MpegEncContext *s,
1659                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1660                               int dir, UINT8 **ref_picture,
1661                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
1662 {
1663     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
1664     int mb_x, mb_y, i;
1665     UINT8 *ptr, *dest;
1666     int emu=0;
1667
1668     mb_x = s->mb_x;
1669     mb_y = s->mb_y;
1670
1671     switch(s->mv_type) {
1672     case MV_TYPE_16X16:
1673         if(s->mcsel){
1674             if(s->real_sprite_warping_points==1){
1675                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
1676                             ref_picture, 0);
1677             }else{
1678                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
1679                             ref_picture, 0);
1680             }
1681         }else if(s->quarter_sample){
1682             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1683                         ref_picture, 0,
1684                         0, pix_op, qpix_op,
1685                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1686         }else if(s->mspel){
1687             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
1688                         ref_picture, pix_op,
1689                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1690         }else{
1691             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1692                         ref_picture, 0,
1693                         0, pix_op,
1694                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1695         }
1696         break;
1697     case MV_TYPE_8X8:
1698         mx = 0;
1699         my = 0;
1700         if(s->quarter_sample){
1701             for(i=0;i<4;i++) {
1702                 motion_x = s->mv[dir][i][0];
1703                 motion_y = s->mv[dir][i][1];
1704
1705                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1706                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
1707                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
1708
1709                 /* WARNING: do no forget half pels */
1710                 src_x = clip(src_x, -16, s->width);
1711                 if (src_x == s->width)
1712                     dxy &= ~3;
1713                 src_y = clip(src_y, -16, s->height);
1714                 if (src_y == s->height)
1715                     dxy &= ~12;
1716
1717                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1718                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1719                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1720                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1721                         ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1722                         ptr= s->edge_emu_buffer;
1723                     }
1724                 }
1725                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1726                 qpix_op[1][dxy](dest, ptr, s->linesize);
1727
1728                 mx += s->mv[dir][i][0]/2;
1729                 my += s->mv[dir][i][1]/2;
1730             }
1731         }else{
1732             for(i=0;i<4;i++) {
1733                 motion_x = s->mv[dir][i][0];
1734                 motion_y = s->mv[dir][i][1];
1735
1736                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1737                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
1738                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
1739
1740                 /* WARNING: do no forget half pels */
1741                 src_x = clip(src_x, -16, s->width);
1742                 if (src_x == s->width)
1743                     dxy &= ~1;
1744                 src_y = clip(src_y, -16, s->height);
1745                 if (src_y == s->height)
1746                     dxy &= ~2;
1747
1748                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1749                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1750                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1751                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1752                         ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1753                         ptr= s->edge_emu_buffer;
1754                     }
1755                 }
1756                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1757                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
1758
1759                 mx += s->mv[dir][i][0];
1760                 my += s->mv[dir][i][1];
1761             }
1762         }
1763
1764         if(s->flags&CODEC_FLAG_GRAY) break;
1765         /* In case of 8X8, we construct a single chroma motion vector
1766            with a special rounding */
1767         for(i=0;i<4;i++) {
1768         }
1769         if (mx >= 0)
1770             mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1771         else {
1772             mx = -mx;
1773             mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1774         }
1775         if (my >= 0)
1776             my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1777         else {
1778             my = -my;
1779             my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1780         }
1781         dxy = ((my & 1) << 1) | (mx & 1);
1782         mx >>= 1;
1783         my >>= 1;
1784
1785         src_x = mb_x * 8 + mx;
1786         src_y = mb_y * 8 + my;
1787         src_x = clip(src_x, -8, s->width/2);
1788         if (src_x == s->width/2)
1789             dxy &= ~1;
1790         src_y = clip(src_y, -8, s->height/2);
1791         if (src_y == s->height/2)
1792             dxy &= ~2;
1793
1794         offset = (src_y * (s->uvlinesize)) + src_x;
1795         ptr = ref_picture[1] + offset;
1796         if(s->flags&CODEC_FLAG_EMU_EDGE){
1797                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
1798                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
1799                     ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1800                     ptr= s->edge_emu_buffer;
1801                     emu=1;
1802                 }
1803             }
1804         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
1805
1806         ptr = ref_picture[2] + offset;
1807         if(emu){
1808             ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1809             ptr= s->edge_emu_buffer;
1810         }
1811         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
1812         break;
1813     case MV_TYPE_FIELD:
1814         if (s->picture_structure == PICT_FRAME) {
1815             if(s->quarter_sample){
1816                 /* top field */
1817                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1818                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1819                             1, pix_op, qpix_op,
1820                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1821                 /* bottom field */
1822                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1823                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1824                             1, pix_op, qpix_op,
1825                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1826             }else{
1827                 /* top field */
1828                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1829                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1830                             1, pix_op,
1831                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1832                 /* bottom field */
1833                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1834                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1835                             1, pix_op,
1836                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1837             }
1838         } else {
1839
1840
1841         }
1842         break;
1843     }
1844 }
1845
1846
1847 /* put block[] to dest[] */
1848 static inline void put_dct(MpegEncContext *s,
1849                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1850 {
1851     s->dct_unquantize(s, block, i, s->qscale);
1852     s->idct_put (dest, line_size, block);
1853 }
1854
1855 /* add block[] to dest[] */
1856 static inline void add_dct(MpegEncContext *s,
1857                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1858 {
1859     if (s->block_last_index[i] >= 0) {
1860         s->idct_add (dest, line_size, block);
1861     }
1862 }
1863
1864 static inline void add_dequant_dct(MpegEncContext *s,
1865                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1866 {
1867     if (s->block_last_index[i] >= 0) {
1868         s->dct_unquantize(s, block, i, s->qscale);
1869
1870         s->idct_add (dest, line_size, block);
1871     }
1872 }
1873
1874 /**
1875  * cleans dc, ac, coded_block for the current non intra MB
1876  */
1877 void ff_clean_intra_table_entries(MpegEncContext *s)
1878 {
1879     int wrap = s->block_wrap[0];
1880     int xy = s->block_index[0];
1881
1882     s->dc_val[0][xy           ] =
1883     s->dc_val[0][xy + 1       ] =
1884     s->dc_val[0][xy     + wrap] =
1885     s->dc_val[0][xy + 1 + wrap] = 1024;
1886     /* ac pred */
1887     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(INT16));
1888     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(INT16));
1889     if (s->msmpeg4_version>=3) {
1890         s->coded_block[xy           ] =
1891         s->coded_block[xy + 1       ] =
1892         s->coded_block[xy     + wrap] =
1893         s->coded_block[xy + 1 + wrap] = 0;
1894     }
1895     /* chroma */
1896     wrap = s->block_wrap[4];
1897     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
1898     s->dc_val[1][xy] =
1899     s->dc_val[2][xy] = 1024;
1900     /* ac pred */
1901     memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
1902     memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
1903
1904     s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
1905 }
1906
1907 /* generic function called after a macroblock has been parsed by the
1908    decoder or after it has been encoded by the encoder.
1909
1910    Important variables used:
1911    s->mb_intra : true if intra macroblock
1912    s->mv_dir   : motion vector direction
1913    s->mv_type  : motion vector type
1914    s->mv       : motion vector
1915    s->interlaced_dct : true if interlaced dct used (mpeg2)
1916  */
1917 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1918 {
1919     int mb_x, mb_y;
1920     const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
1921
1922     mb_x = s->mb_x;
1923     mb_y = s->mb_y;
1924
1925     s->current_picture.qscale_table[mb_xy]= s->qscale;
1926
1927     /* update DC predictors for P macroblocks */
1928     if (!s->mb_intra) {
1929         if (s->h263_pred || s->h263_aic) {
1930             if(s->mbintra_table[mb_xy])
1931                 ff_clean_intra_table_entries(s);
1932         } else {
1933             s->last_dc[0] =
1934             s->last_dc[1] =
1935             s->last_dc[2] = 128 << s->intra_dc_precision;
1936         }
1937     }
1938     else if (s->h263_pred || s->h263_aic)
1939         s->mbintra_table[mb_xy]=1;
1940
1941     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
1942     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
1943         //FIXME a lot of thet is only needed for !low_delay
1944         const int wrap = s->block_wrap[0];
1945         const int xy = s->block_index[0];
1946         const int mb_index= s->mb_x + s->mb_y*s->mb_width;
1947         if(s->mv_type == MV_TYPE_8X8){
1948             s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
1949         } else {
1950             int motion_x, motion_y;
1951             if (s->mb_intra) {
1952                 motion_x = 0;
1953                 motion_y = 0;
1954                 if(s->co_located_type_table)
1955                     s->co_located_type_table[mb_index]= 0;
1956             } else if (s->mv_type == MV_TYPE_16X16) {
1957                 motion_x = s->mv[0][0][0];
1958                 motion_y = s->mv[0][0][1];
1959                 if(s->co_located_type_table)
1960                     s->co_located_type_table[mb_index]= 0;
1961             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
1962                 int i;
1963                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
1964                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
1965                 motion_x = (motion_x>>1) | (motion_x&1);
1966                 for(i=0; i<2; i++){
1967                     s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
1968                     s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
1969                     s->field_select_table[mb_index][i]= s->field_select[0][i];
1970                 }
1971                 s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
1972             }
1973             /* no update if 8X8 because it has been done during parsing */
1974             s->motion_val[xy][0] = motion_x;
1975             s->motion_val[xy][1] = motion_y;
1976             s->motion_val[xy + 1][0] = motion_x;
1977             s->motion_val[xy + 1][1] = motion_y;
1978             s->motion_val[xy + wrap][0] = motion_x;
1979             s->motion_val[xy + wrap][1] = motion_y;
1980             s->motion_val[xy + 1 + wrap][0] = motion_x;
1981             s->motion_val[xy + 1 + wrap][1] = motion_y;
1982         }
1983     }
1984
1985     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
1986         UINT8 *dest_y, *dest_cb, *dest_cr;
1987         int dct_linesize, dct_offset;
1988         op_pixels_func (*op_pix)[4];
1989         qpel_mc_func (*op_qpix)[16];
1990
1991         /* avoid copy if macroblock skipped in last frame too */
1992         if (s->pict_type != B_TYPE) {
1993             s->current_picture.mbskip_table[mb_xy]= s->mb_skiped;
1994         }
1995
1996         /* skip only during decoding as we might trash the buffers during encoding a bit */
1997         if(!s->encoding){
1998             UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy];
1999             const int age= s->current_picture.age;
2000
2001             assert(age);
2002
2003             if (s->mb_skiped) {
2004                 s->mb_skiped= 0;
2005                 assert(s->pict_type!=I_TYPE);
2006
2007                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2008                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2009
2010                 /* if previous was skipped too, then nothing to do !  */
2011                 if (*mbskip_ptr >= age){
2012 //if(s->pict_type!=B_TYPE && s->mb_x==0) printf("\n");
2013 //if(s->pict_type!=B_TYPE) printf("%d%d ", *mbskip_ptr, age);
2014                     if(s->pict_type!=B_TYPE) return;
2015                     if(s->avctx->draw_horiz_band==NULL && *mbskip_ptr > age) return;
2016                     /* we dont draw complete frames here so we cant skip */
2017                 }
2018             } else {
2019                 *mbskip_ptr = 0; /* not skipped */
2020             }
2021         }else
2022             s->mb_skiped= 0;
2023
2024         if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band){
2025             dest_y  = s->current_picture.data[0] + mb_x * 16;
2026             dest_cb = s->current_picture.data[1] + mb_x * 8;
2027             dest_cr = s->current_picture.data[2] + mb_x * 8;
2028         }else{
2029             dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2030             dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2031             dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2032         }
2033
2034         if (s->interlaced_dct) {
2035             dct_linesize = s->linesize * 2;
2036             dct_offset = s->linesize;
2037         } else {
2038             dct_linesize = s->linesize;
2039             dct_offset = s->linesize * 8;
2040         }
2041
2042         if (!s->mb_intra) {
2043             /* motion handling */
2044             /* decoding or more than one mb_type (MC was allready done otherwise) */
2045             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
2046                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
2047                     op_pix = s->dsp.put_pixels_tab;
2048                     op_qpix= s->dsp.put_qpel_pixels_tab;
2049                 }else{
2050                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2051                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2052                 }
2053
2054                 if (s->mv_dir & MV_DIR_FORWARD) {
2055                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2056                     op_pix = s->dsp.avg_pixels_tab;
2057                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2058                 }
2059                 if (s->mv_dir & MV_DIR_BACKWARD) {
2060                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2061                 }
2062             }
2063
2064             /* skip dequant / idct if we are really late ;) */
2065             if(s->hurry_up>1) return;
2066
2067             /* add dct residue */
2068             if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO
2069                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2070                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2071                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2072                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2073                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2074
2075                 if(!(s->flags&CODEC_FLAG_GRAY)){
2076                     add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2077                     add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2078                 }
2079             } else if(s->codec_id != CODEC_ID_WMV2){
2080                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2081                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2082                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2083                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2084
2085                 if(!(s->flags&CODEC_FLAG_GRAY)){
2086                     add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2087                     add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2088                 }
2089             } else{
2090                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2091             }
2092         } else {
2093             /* dct only in intra block */
2094             if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
2095                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2096                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2097                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2098                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2099
2100                 if(!(s->flags&CODEC_FLAG_GRAY)){
2101                     put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2102                     put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2103                 }
2104             }else{
2105                 s->idct_put(dest_y                 , dct_linesize, block[0]);
2106                 s->idct_put(dest_y              + 8, dct_linesize, block[1]);
2107                 s->idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2108                 s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2109
2110                 if(!(s->flags&CODEC_FLAG_GRAY)){
2111                     s->idct_put(dest_cb, s->uvlinesize, block[4]);
2112                     s->idct_put(dest_cr, s->uvlinesize, block[5]);
2113                 }
2114             }
2115         }
2116     }
2117 }
2118
2119 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2120 {
2121     static const char tab[64]=
2122         {3,2,2,1,1,1,1,1,
2123          1,1,1,1,1,1,1,1,
2124          1,1,1,1,1,1,1,1,
2125          0,0,0,0,0,0,0,0,
2126          0,0,0,0,0,0,0,0,
2127          0,0,0,0,0,0,0,0,
2128          0,0,0,0,0,0,0,0,
2129          0,0,0,0,0,0,0,0};
2130     int score=0;
2131     int run=0;
2132     int i;
2133     DCTELEM *block= s->block[n];
2134     const int last_index= s->block_last_index[n];
2135     int skip_dc;
2136
2137     if(threshold<0){
2138         skip_dc=0;
2139         threshold= -threshold;
2140     }else
2141         skip_dc=1;
2142
2143     /* are all which we could set to zero are allready zero? */
2144     if(last_index<=skip_dc - 1) return;
2145
2146     for(i=0; i<=last_index; i++){
2147         const int j = s->intra_scantable.permutated[i];
2148         const int level = ABS(block[j]);
2149         if(level==1){
2150             if(skip_dc && i==0) continue;
2151             score+= tab[run];
2152             run=0;
2153         }else if(level>1){
2154             return;
2155         }else{
2156             run++;
2157         }
2158     }
2159     if(score >= threshold) return;
2160     for(i=skip_dc; i<=last_index; i++){
2161         const int j = s->intra_scantable.permutated[i];
2162         block[j]=0;
2163     }
2164     if(block[0]) s->block_last_index[n]= 0;
2165     else         s->block_last_index[n]= -1;
2166 }
2167
2168 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2169 {
2170     int i;
2171     const int maxlevel= s->max_qcoeff;
2172     const int minlevel= s->min_qcoeff;
2173
2174     if(s->mb_intra){
2175         i=1; //skip clipping of intra dc
2176     }else
2177         i=0;
2178
2179     for(;i<=last_index; i++){
2180         const int j= s->intra_scantable.permutated[i];
2181         int level = block[j];
2182
2183         if     (level>maxlevel) level=maxlevel;
2184         else if(level<minlevel) level=minlevel;
2185         block[j]= level;
2186     }
2187 }
2188
2189 static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
2190 {
2191     int i;
2192
2193     if(s->mb_intra){
2194         i=1; //skip clipping of intra dc
2195          //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2196     }else
2197         i=0;
2198
2199     for(;i<=s->block_last_index[n]; i++){
2200         const int j = s->intra_scantable.permutated[i];
2201         int level = block[j];
2202
2203         block[j]= ROUNDED_DIV(level*oldq, newq);
2204     }
2205
2206     for(i=s->block_last_index[n]; i>=0; i--){
2207         const int j = s->intra_scantable.permutated[i];
2208         if(block[j]) break;
2209     }
2210     s->block_last_index[n]= i;
2211 }
2212
2213 static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64])
2214 {
2215     int i,n, newq;
2216     const int maxlevel= s->max_qcoeff;
2217     const int minlevel= s->min_qcoeff;
2218     int largest=0, smallest=0;
2219
2220     assert(s->adaptive_quant);
2221
2222     for(n=0; n<6; n++){
2223         if(s->mb_intra){
2224             i=1; //skip clipping of intra dc
2225              //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2226         }else
2227             i=0;
2228
2229         for(;i<=s->block_last_index[n]; i++){
2230             const int j = s->intra_scantable.permutated[i];
2231             int level = block[n][j];
2232             if(largest  < level) largest = level;
2233             if(smallest > level) smallest= level;
2234         }
2235     }
2236
2237     for(newq=s->qscale+1; newq<32; newq++){
2238         if(   ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel
2239            && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel)
2240             break;
2241     }
2242
2243     if(s->out_format==FMT_H263){
2244         /* h263 like formats cannot change qscale by more than 2 easiely */
2245         if(s->avctx->qmin + 2 < newq)
2246             newq= s->avctx->qmin + 2;
2247     }
2248
2249     for(n=0; n<6; n++){
2250         requantize_coeffs(s, block[n], s->qscale, newq, n);
2251         clip_coeffs(s, block[n], s->block_last_index[n]);
2252     }
2253
2254     s->dquant+= newq - s->qscale;
2255     s->qscale= newq;
2256 }
2257 #if 0
2258 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2259     int score=0;
2260     int x,y;
2261
2262     for(y=0; y<7; y++){
2263         for(x=0; x<16; x+=4){
2264             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
2265                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2266         }
2267         s+= stride;
2268     }
2269
2270     return score;
2271 }
2272
2273 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2274     int score=0;
2275     int x,y;
2276
2277     for(y=0; y<7; y++){
2278         for(x=0; x<16; x++){
2279             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2280         }
2281         s1+= stride;
2282         s2+= stride;
2283     }
2284
2285     return score;
2286 }
2287 #else
2288 #define SQ(a) ((a)*(a))
2289
2290 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2291     int score=0;
2292     int x,y;
2293
2294     for(y=0; y<7; y++){
2295         for(x=0; x<16; x+=4){
2296             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
2297                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2298         }
2299         s+= stride;
2300     }
2301
2302     return score;
2303 }
2304
2305 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2306     int score=0;
2307     int x,y;
2308
2309     for(y=0; y<7; y++){
2310         for(x=0; x<16; x++){
2311             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2312         }
2313         s1+= stride;
2314         s2+= stride;
2315     }
2316
2317     return score;
2318 }
2319
2320 #endif
2321
2322 void ff_draw_horiz_band(MpegEncContext *s){
2323     if (    s->avctx->draw_horiz_band
2324         && (s->last_picture.data[0] || s->low_delay) ) {
2325         UINT8 *src_ptr[3];
2326         int y, h, offset;
2327         y = s->mb_y * 16;
2328         h = s->height - y;
2329         if (h > 16)
2330             h = 16;
2331
2332         if(s->pict_type==B_TYPE)
2333             offset = 0;
2334         else
2335             offset = y * s->linesize;
2336
2337         if(s->pict_type==B_TYPE || s->low_delay){
2338             src_ptr[0] = s->current_picture.data[0] + offset;
2339             src_ptr[1] = s->current_picture.data[1] + (offset >> 2);
2340             src_ptr[2] = s->current_picture.data[2] + (offset >> 2);
2341         } else {
2342             src_ptr[0] = s->last_picture.data[0] + offset;
2343             src_ptr[1] = s->last_picture.data[1] + (offset >> 2);
2344             src_ptr[2] = s->last_picture.data[2] + (offset >> 2);
2345         }
2346         emms_c();
2347
2348         s->avctx->draw_horiz_band(s->avctx, src_ptr, s->linesize,
2349                                y, s->width, h);
2350     }
2351 }
2352
2353 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2354 {
2355     const int mb_x= s->mb_x;
2356     const int mb_y= s->mb_y;
2357     int i;
2358     int skip_dct[6];
2359     int dct_offset   = s->linesize*8; //default for progressive frames
2360
2361     for(i=0; i<6; i++) skip_dct[i]=0;
2362
2363     if(s->adaptive_quant){
2364         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
2365
2366         if(s->out_format==FMT_H263){
2367             if     (s->dquant> 2) s->dquant= 2;
2368             else if(s->dquant<-2) s->dquant=-2;
2369         }
2370
2371         if(s->codec_id==CODEC_ID_MPEG4){
2372             if(!s->mb_intra){
2373                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2374
2375                 if(s->mv_dir&MV_DIRECT)
2376                     s->dquant=0;
2377             }
2378         }
2379         s->qscale+= s->dquant;
2380         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2381         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2382     }
2383
2384     if (s->mb_intra) {
2385         UINT8 *ptr;
2386         int wrap_y;
2387         int emu=0;
2388
2389         wrap_y = s->linesize;
2390         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2391
2392         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2393             ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2394             ptr= s->edge_emu_buffer;
2395             emu=1;
2396         }
2397
2398         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2399             int progressive_score, interlaced_score;
2400
2401             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2402             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2403
2404             if(progressive_score > interlaced_score + 100){
2405                 s->interlaced_dct=1;
2406
2407                 dct_offset= wrap_y;
2408                 wrap_y<<=1;
2409             }else
2410                 s->interlaced_dct=0;
2411         }
2412
2413         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2414         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2415         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2416         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2417
2418         if(s->flags&CODEC_FLAG_GRAY){
2419             skip_dct[4]= 1;
2420             skip_dct[5]= 1;
2421         }else{
2422             int wrap_c = s->uvlinesize;
2423             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2424             if(emu){
2425                 ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2426                 ptr= s->edge_emu_buffer;
2427             }
2428             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2429
2430             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2431             if(emu){
2432                 ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2433                 ptr= s->edge_emu_buffer;
2434             }
2435             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2436         }
2437     }else{
2438         op_pixels_func (*op_pix)[4];
2439         qpel_mc_func (*op_qpix)[16];
2440         UINT8 *dest_y, *dest_cb, *dest_cr;
2441         UINT8 *ptr_y, *ptr_cb, *ptr_cr;
2442         int wrap_y, wrap_c;
2443         int emu=0;
2444
2445         dest_y  = s->current_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
2446         dest_cb = s->current_picture.data[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2447         dest_cr = s->current_picture.data[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2448         wrap_y = s->linesize;
2449         wrap_c = s->uvlinesize;
2450         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2451         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2452         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2453
2454         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2455             op_pix = s->dsp.put_pixels_tab;
2456             op_qpix= s->dsp.put_qpel_pixels_tab;
2457         }else{
2458             op_pix = s->dsp.put_no_rnd_pixels_tab;
2459             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2460         }
2461
2462         if (s->mv_dir & MV_DIR_FORWARD) {
2463             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2464             op_pix = s->dsp.avg_pixels_tab;
2465             op_qpix= s->dsp.avg_qpel_pixels_tab;
2466         }
2467         if (s->mv_dir & MV_DIR_BACKWARD) {
2468             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2469         }
2470
2471         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2472             ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2473             ptr_y= s->edge_emu_buffer;
2474             emu=1;
2475         }
2476
2477         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2478             int progressive_score, interlaced_score;
2479
2480             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2481                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2482             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2483                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2484
2485             if(progressive_score > interlaced_score + 600){
2486                 s->interlaced_dct=1;
2487
2488                 dct_offset= wrap_y;
2489                 wrap_y<<=1;
2490             }else
2491                 s->interlaced_dct=0;
2492         }
2493
2494         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2495         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2496         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2497         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2498
2499         if(s->flags&CODEC_FLAG_GRAY){
2500             skip_dct[4]= 1;
2501             skip_dct[5]= 1;
2502         }else{
2503             if(emu){
2504                 ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2505                 ptr_cb= s->edge_emu_buffer;
2506             }
2507             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2508             if(emu){
2509                 ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2510                 ptr_cr= s->edge_emu_buffer;
2511             }
2512             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2513         }
2514         /* pre quantization */
2515         if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
2516             //FIXME optimize
2517             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2518             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2519             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2520             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2521             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2522             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2523 #if 0
2524 {
2525  static int stat[7];
2526  int num=0;
2527  for(i=0; i<6; i++)
2528   if(skip_dct[i]) num++;
2529  stat[num]++;
2530
2531  if(s->mb_x==0 && s->mb_y==0){
2532   for(i=0; i<7; i++){
2533    printf("%6d %1d\n", stat[i], i);
2534   }
2535  }
2536 }
2537 #endif
2538         }
2539
2540     }
2541
2542 #if 0
2543             {
2544                 float adap_parm;
2545
2546                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
2547                             ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2548
2549                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
2550                         (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P',
2551                         s->qscale, adap_parm, s->qscale*adap_parm,
2552                         s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
2553             }
2554 #endif
2555     /* DCT & quantize */
2556     if(s->out_format==FMT_MJPEG){
2557         for(i=0;i<6;i++) {
2558             int overflow;
2559             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2560             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2561         }
2562     }else{
2563         for(i=0;i<6;i++) {
2564             if(!skip_dct[i]){
2565                 int overflow;
2566                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2567             // FIXME we could decide to change to quantizer instead of clipping
2568             // JS: I don't think that would be a good idea it could lower quality instead
2569             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2570                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2571             }else
2572                 s->block_last_index[i]= -1;
2573         }
2574         if(s->luma_elim_threshold && !s->mb_intra)
2575             for(i=0; i<4; i++)
2576                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2577         if(s->chroma_elim_threshold && !s->mb_intra)
2578             for(i=4; i<6; i++)
2579                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2580     }
2581
2582     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
2583         s->block_last_index[4]=
2584         s->block_last_index[5]= 0;
2585         s->block[4][0]=
2586         s->block[5][0]= 128;
2587     }
2588
2589 #ifdef CONFIG_ENCODERS
2590     /* huffman encode */
2591     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2592     case CODEC_ID_MPEG1VIDEO:
2593         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
2594     case CODEC_ID_MPEG4:
2595         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2596     case CODEC_ID_MSMPEG4V2:
2597     case CODEC_ID_MSMPEG4V3:
2598     case CODEC_ID_WMV1:
2599         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
2600     case CODEC_ID_WMV2:
2601          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
2602     case CODEC_ID_MJPEG:
2603         mjpeg_encode_mb(s, s->block); break;
2604     case CODEC_ID_H263:
2605     case CODEC_ID_H263P:
2606     case CODEC_ID_RV10:
2607         h263_encode_mb(s, s->block, motion_x, motion_y); break;
2608     default:
2609         assert(0);
2610     }
2611 #endif
2612 }
2613
2614 void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
2615 {
2616     int bytes= length>>4;
2617     int bits= length&15;
2618     int i;
2619
2620     if(length==0) return;
2621
2622     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
2623     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
2624 }
2625
2626 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2627     int i;
2628
2629     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2630
2631     /* mpeg1 */
2632     d->mb_incr= s->mb_incr;
2633     for(i=0; i<3; i++)
2634         d->last_dc[i]= s->last_dc[i];
2635
2636     /* statistics */
2637     d->mv_bits= s->mv_bits;
2638     d->i_tex_bits= s->i_tex_bits;
2639     d->p_tex_bits= s->p_tex_bits;
2640     d->i_count= s->i_count;
2641     d->f_count= s->f_count;
2642     d->b_count= s->b_count;
2643     d->skip_count= s->skip_count;
2644     d->misc_bits= s->misc_bits;
2645     d->last_bits= 0;
2646
2647     d->mb_skiped= s->mb_skiped;
2648     d->qscale= s->qscale;
2649 }
2650
2651 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2652     int i;
2653
2654     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2655     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2656
2657     /* mpeg1 */
2658     d->mb_incr= s->mb_incr;
2659     for(i=0; i<3; i++)
2660         d->last_dc[i]= s->last_dc[i];
2661
2662     /* statistics */
2663     d->mv_bits= s->mv_bits;
2664     d->i_tex_bits= s->i_tex_bits;
2665     d->p_tex_bits= s->p_tex_bits;
2666     d->i_count= s->i_count;
2667     d->f_count= s->f_count;
2668     d->b_count= s->b_count;
2669     d->skip_count= s->skip_count;
2670     d->misc_bits= s->misc_bits;
2671
2672     d->mb_intra= s->mb_intra;
2673     d->mb_skiped= s->mb_skiped;
2674     d->mv_type= s->mv_type;
2675     d->mv_dir= s->mv_dir;
2676     d->pb= s->pb;
2677     if(s->data_partitioning){
2678         d->pb2= s->pb2;
2679         d->tex_pb= s->tex_pb;
2680     }
2681     d->block= s->block;
2682     for(i=0; i<6; i++)
2683         d->block_last_index[i]= s->block_last_index[i];
2684     d->interlaced_dct= s->interlaced_dct;
2685     d->qscale= s->qscale;
2686 }
2687
2688 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2689                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2690                            int *dmin, int *next_block, int motion_x, int motion_y)
2691 {
2692     int bits_count;
2693
2694     copy_context_before_encode(s, backup, type);
2695
2696     s->block= s->blocks[*next_block];
2697     s->pb= pb[*next_block];
2698     if(s->data_partitioning){
2699         s->pb2   = pb2   [*next_block];
2700         s->tex_pb= tex_pb[*next_block];
2701     }
2702
2703     encode_mb(s, motion_x, motion_y);
2704
2705     bits_count= get_bit_count(&s->pb);
2706     if(s->data_partitioning){
2707         bits_count+= get_bit_count(&s->pb2);
2708         bits_count+= get_bit_count(&s->tex_pb);
2709     }
2710
2711     if(bits_count<*dmin){
2712         *dmin= bits_count;
2713         *next_block^=1;
2714
2715         copy_context_after_encode(best, s, type);
2716     }
2717 }
2718
2719 static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2720     uint32_t *sq = squareTbl + 256;
2721     int acc=0;
2722     int x,y;
2723
2724     if(w==16 && h==16)
2725         return s->dsp.sse[0](NULL, src1, src2, stride);
2726     else if(w==8 && h==8)
2727         return s->dsp.sse[1](NULL, src1, src2, stride);
2728
2729     for(y=0; y<h; y++){
2730         for(x=0; x<w; x++){
2731             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2732         }
2733     }
2734
2735     assert(acc>=0);
2736
2737     return acc;
2738 }
2739
2740 static void encode_picture(MpegEncContext *s, int picture_number)
2741 {
2742     int mb_x, mb_y, pdif = 0;
2743     int i;
2744     int bits;
2745     MpegEncContext best_s, backup_s;
2746     UINT8 bit_buf[2][3000];
2747     UINT8 bit_buf2[2][3000];
2748     UINT8 bit_buf_tex[2][3000];
2749     PutBitContext pb[2], pb2[2], tex_pb[2];
2750
2751     for(i=0; i<2; i++){
2752         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
2753         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
2754         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
2755     }
2756
2757     s->picture_number = picture_number;
2758
2759     s->block_wrap[0]=
2760     s->block_wrap[1]=
2761     s->block_wrap[2]=
2762     s->block_wrap[3]= s->mb_width*2 + 2;
2763     s->block_wrap[4]=
2764     s->block_wrap[5]= s->mb_width + 2;
2765
2766     /* Reset the average MB variance */
2767     s->current_picture.mb_var_sum = 0;
2768     s->current_picture.mc_mb_var_sum = 0;
2769
2770     /* we need to initialize some time vars before we can encode b-frames */
2771     if (s->h263_pred && !s->h263_msmpeg4)
2772         ff_set_mpeg4_time(s, s->picture_number);
2773
2774     s->scene_change_score=0;
2775
2776     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
2777
2778     if(s->msmpeg4_version){
2779         if(s->pict_type==I_TYPE)
2780             s->no_rounding=1;
2781         else if(s->flipflop_rounding)
2782             s->no_rounding ^= 1;
2783     }else if(s->out_format == FMT_H263){
2784         if(s->pict_type==I_TYPE)
2785             s->no_rounding=0;
2786         else if(s->pict_type!=B_TYPE)
2787             s->no_rounding ^= 1;
2788     }
2789
2790     /* Estimate motion for every MB */
2791     if(s->pict_type != I_TYPE){
2792
2793         if(s->pict_type != B_TYPE){
2794             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
2795                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
2796                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
2797                         s->mb_x = mb_x;
2798                         s->mb_y = mb_y;
2799                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
2800                     }
2801                 }
2802             }
2803         }
2804
2805         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2806             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2807             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2808             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2809             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2810             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2811                 s->mb_x = mb_x;
2812                 s->mb_y = mb_y;
2813                 s->block_index[0]+=2;
2814                 s->block_index[1]+=2;
2815                 s->block_index[2]+=2;
2816                 s->block_index[3]+=2;
2817
2818                 /* compute motion vector & mb_type and store in context */
2819                 if(s->pict_type==B_TYPE)
2820                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
2821                 else
2822                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
2823             }
2824         }
2825     }else /* if(s->pict_type == I_TYPE) */{
2826         /* I-Frame */
2827         //FIXME do we need to zero them?
2828         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
2829         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
2830         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2831
2832         if(!s->fixed_qscale){
2833             /* finding spatial complexity for I-frame rate control */
2834             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2835                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2836                     int xx = mb_x * 16;
2837                     int yy = mb_y * 16;
2838                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
2839                     int varc;
2840                     int sum = s->dsp.pix_sum(pix, s->linesize);
2841
2842                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2843
2844                     s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
2845                     s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
2846                     s->current_picture.mb_var_sum    += varc;
2847                 }
2848             }
2849         }
2850     }
2851     emms_c();
2852
2853     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
2854         s->pict_type= I_TYPE;
2855         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2856 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2857     }
2858
2859     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE)
2860         s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
2861         ff_fix_long_p_mvs(s);
2862     if(s->pict_type==B_TYPE){
2863         s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
2864         s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
2865
2866         ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
2867         ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
2868         ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
2869         ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
2870     }
2871
2872     if (s->fixed_qscale)
2873         s->frame_qscale = s->current_picture.quality;
2874     else
2875         s->frame_qscale = ff_rate_estimate_qscale(s);
2876
2877     if(s->adaptive_quant){
2878         switch(s->codec_id){
2879         case CODEC_ID_MPEG4:
2880             ff_clean_mpeg4_qscales(s);
2881             break;
2882         case CODEC_ID_H263:
2883         case CODEC_ID_H263P:
2884             ff_clean_h263_qscales(s);
2885             break;
2886         }
2887
2888         s->qscale= s->current_picture.qscale_table[0];
2889     }else
2890         s->qscale= (int)(s->frame_qscale + 0.5);
2891
2892     if (s->out_format == FMT_MJPEG) {
2893         /* for mjpeg, we do include qscale in the matrix */
2894         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
2895         for(i=1;i<64;i++){
2896             int j= s->idct_permutation[i];
2897
2898             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2899         }
2900         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
2901                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
2902     }
2903
2904     //FIXME var duplication
2905     s->current_picture.key_frame= s->pict_type == I_TYPE;
2906     s->current_picture.pict_type= s->pict_type;
2907
2908     if(s->current_picture.key_frame)
2909         s->picture_in_gop_number=0;
2910
2911     s->last_bits= get_bit_count(&s->pb);
2912     switch(s->out_format) {
2913     case FMT_MJPEG:
2914         mjpeg_picture_header(s);
2915         break;
2916     case FMT_H263:
2917         if (s->codec_id == CODEC_ID_WMV2)
2918             ff_wmv2_encode_picture_header(s, picture_number);
2919         else if (s->h263_msmpeg4)
2920             msmpeg4_encode_picture_header(s, picture_number);
2921         else if (s->h263_pred)
2922             mpeg4_encode_picture_header(s, picture_number);
2923         else if (s->h263_rv10)
2924             rv10_encode_picture_header(s, picture_number);
2925         else
2926             h263_encode_picture_header(s, picture_number);
2927         break;
2928     case FMT_MPEG1:
2929         mpeg1_encode_picture_header(s, picture_number);
2930         break;
2931     }
2932     bits= get_bit_count(&s->pb);
2933     s->header_bits= bits - s->last_bits;
2934     s->last_bits= bits;
2935     s->mv_bits=0;
2936     s->misc_bits=0;
2937     s->i_tex_bits=0;
2938     s->p_tex_bits=0;
2939     s->i_count=0;
2940     s->f_count=0;
2941     s->b_count=0;
2942     s->skip_count=0;
2943
2944     for(i=0; i<3; i++){
2945         /* init last dc values */
2946         /* note: quant matrix value (8) is implied here */
2947         s->last_dc[i] = 128;
2948
2949         s->current_picture.error[i] = 0;
2950     }
2951     s->mb_incr = 1;
2952     s->last_mv[0][0][0] = 0;
2953     s->last_mv[0][0][1] = 0;
2954
2955     if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
2956         s->gob_index = ff_h263_get_gob_height(s);
2957
2958     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
2959         ff_mpeg4_init_partitions(s);
2960
2961     s->resync_mb_x=0;
2962     s->resync_mb_y=0;
2963     s->first_slice_line = 1;
2964     s->ptr_lastgob = s->pb.buf;
2965     s->ptr_last_mb_line = s->pb.buf;
2966     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2967         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2968         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2969
2970         s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2971         s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2972         s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2973         s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2974         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
2975         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
2976         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2977             const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
2978             const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
2979 //            int d;
2980             int dmin=10000000;
2981
2982             s->mb_x = mb_x;
2983             s->mb_y = mb_y;
2984             s->block_index[0]+=2;
2985             s->block_index[1]+=2;
2986             s->block_index[2]+=2;
2987             s->block_index[3]+=2;
2988             s->block_index[4]++;
2989             s->block_index[5]++;
2990
2991             /* write gob / video packet header  */
2992             if(s->rtp_mode){
2993                 int current_packet_size, is_gob_start;
2994
2995                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
2996                 is_gob_start=0;
2997
2998                 if(s->codec_id==CODEC_ID_MPEG4){
2999                     if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
3000                        && s->mb_y + s->mb_x>0){
3001
3002                         if(s->partitioned_frame){
3003                             ff_mpeg4_merge_partitions(s);
3004                             ff_mpeg4_init_partitions(s);
3005                         }
3006                         ff_mpeg4_encode_video_packet_header(s);
3007
3008                         if(s->flags&CODEC_FLAG_PASS1){
3009                             int bits= get_bit_count(&s->pb);
3010                             s->misc_bits+= bits - s->last_bits;
3011                             s->last_bits= bits;
3012                         }
3013                         ff_mpeg4_clean_buffers(s);
3014                         is_gob_start=1;
3015                     }
3016                 }else{
3017                     if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
3018                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
3019
3020                         h263_encode_gob_header(s, mb_y);
3021                         is_gob_start=1;
3022                     }
3023                 }
3024
3025                 if(is_gob_start){
3026                     s->ptr_lastgob = pbBufPtr(&s->pb);
3027                     s->first_slice_line=1;
3028                     s->resync_mb_x=mb_x;
3029                     s->resync_mb_y=mb_y;
3030                 }
3031             }
3032
3033             if(  (s->resync_mb_x   == s->mb_x)
3034                && s->resync_mb_y+1 == s->mb_y){
3035                 s->first_slice_line=0;
3036             }
3037
3038             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3039                 int next_block=0;
3040                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3041
3042                 copy_context_before_encode(&backup_s, s, -1);
3043                 backup_s.pb= s->pb;
3044                 best_s.data_partitioning= s->data_partitioning;
3045                 best_s.partitioned_frame= s->partitioned_frame;
3046                 if(s->data_partitioning){
3047                     backup_s.pb2= s->pb2;
3048                     backup_s.tex_pb= s->tex_pb;
3049                 }
3050
3051                 if(mb_type&MB_TYPE_INTER){
3052                     s->mv_dir = MV_DIR_FORWARD;
3053                     s->mv_type = MV_TYPE_16X16;
3054                     s->mb_intra= 0;
3055                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3056                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3057                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
3058                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3059                 }
3060                 if(mb_type&MB_TYPE_INTER4V){
3061                     s->mv_dir = MV_DIR_FORWARD;
3062                     s->mv_type = MV_TYPE_8X8;
3063                     s->mb_intra= 0;
3064                     for(i=0; i<4; i++){
3065                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3066                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3067                     }
3068                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
3069                                  &dmin, &next_block, 0, 0);
3070                 }
3071                 if(mb_type&MB_TYPE_FORWARD){
3072                     s->mv_dir = MV_DIR_FORWARD;
3073                     s->mv_type = MV_TYPE_16X16;
3074                     s->mb_intra= 0;
3075                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3076                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3077                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
3078                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3079                 }
3080                 if(mb_type&MB_TYPE_BACKWARD){
3081                     s->mv_dir = MV_DIR_BACKWARD;
3082                     s->mv_type = MV_TYPE_16X16;
3083                     s->mb_intra= 0;
3084                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3085                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3086                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3087                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3088                 }
3089                 if(mb_type&MB_TYPE_BIDIR){
3090                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3091                     s->mv_type = MV_TYPE_16X16;
3092                     s->mb_intra= 0;
3093                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3094                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3095                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3096                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3097                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
3098                                  &dmin, &next_block, 0, 0);
3099                 }
3100                 if(mb_type&MB_TYPE_DIRECT){
3101                     int mx= s->b_direct_mv_table[xy][0];
3102                     int my= s->b_direct_mv_table[xy][1];
3103
3104                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3105                     s->mb_intra= 0;
3106                     ff_mpeg4_set_direct_mv(s, mx, my);
3107                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
3108                                  &dmin, &next_block, mx, my);
3109                 }
3110                 if(mb_type&MB_TYPE_INTRA){
3111                     s->mv_dir = MV_DIR_FORWARD;
3112                     s->mv_type = MV_TYPE_16X16;
3113                     s->mb_intra= 1;
3114                     s->mv[0][0][0] = 0;
3115                     s->mv[0][0][1] = 0;
3116                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
3117                                  &dmin, &next_block, 0, 0);
3118                     /* force cleaning of ac/dc pred stuff if needed ... */
3119                     if(s->h263_pred || s->h263_aic)
3120                         s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
3121                 }
3122                 copy_context_after_encode(s, &best_s, -1);
3123
3124                 pb_bits_count= get_bit_count(&s->pb);
3125                 flush_put_bits(&s->pb);
3126                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3127                 s->pb= backup_s.pb;
3128
3129                 if(s->data_partitioning){
3130                     pb2_bits_count= get_bit_count(&s->pb2);
3131                     flush_put_bits(&s->pb2);
3132                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3133                     s->pb2= backup_s.pb2;
3134
3135                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3136                     flush_put_bits(&s->tex_pb);
3137                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3138                     s->tex_pb= backup_s.tex_pb;
3139                 }
3140                 s->last_bits= get_bit_count(&s->pb);
3141             } else {
3142                 int motion_x, motion_y;
3143                 s->mv_type=MV_TYPE_16X16;
3144                 // only one MB-Type possible
3145                 switch(mb_type){
3146                 case MB_TYPE_INTRA:
3147                     s->mv_dir = MV_DIR_FORWARD;
3148                     s->mb_intra= 1;
3149                     motion_x= s->mv[0][0][0] = 0;
3150                     motion_y= s->mv[0][0][1] = 0;
3151                     break;
3152                 case MB_TYPE_INTER:
3153                     s->mv_dir = MV_DIR_FORWARD;
3154                     s->mb_intra= 0;
3155                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3156                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3157                     break;
3158                 case MB_TYPE_INTER4V:
3159                     s->mv_dir = MV_DIR_FORWARD;
3160                     s->mv_type = MV_TYPE_8X8;
3161                     s->mb_intra= 0;
3162                     for(i=0; i<4; i++){
3163                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3164                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3165                     }
3166                     motion_x= motion_y= 0;
3167                     break;
3168                 case MB_TYPE_DIRECT:
3169                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3170                     s->mb_intra= 0;
3171                     motion_x=s->b_direct_mv_table[xy][0];
3172                     motion_y=s->b_direct_mv_table[xy][1];
3173                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3174                     break;
3175                 case MB_TYPE_BIDIR:
3176                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3177                     s->mb_intra= 0;
3178                     motion_x=0;
3179                     motion_y=0;
3180                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3181                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3182                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3183                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3184                     break;
3185                 case MB_TYPE_BACKWARD:
3186                     s->mv_dir = MV_DIR_BACKWARD;
3187                     s->mb_intra= 0;
3188                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3189                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3190                     break;
3191                 case MB_TYPE_FORWARD:
3192                     s->mv_dir = MV_DIR_FORWARD;
3193                     s->mb_intra= 0;
3194                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3195                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3196 //                    printf(" %d %d ", motion_x, motion_y);
3197                     break;
3198                 default:
3199                     motion_x=motion_y=0; //gcc warning fix
3200                     printf("illegal MB type\n");
3201                 }
3202                 encode_mb(s, motion_x, motion_y);
3203             }
3204             /* clean the MV table in IPS frames for direct mode in B frames */
3205             if(s->mb_intra /* && I,P,S_TYPE */){
3206                 s->p_mv_table[xy][0]=0;
3207                 s->p_mv_table[xy][1]=0;
3208             }
3209
3210             MPV_decode_mb(s, s->block);
3211
3212             if(s->flags&CODEC_FLAG_PSNR){
3213                 int w= 16;
3214                 int h= 16;
3215
3216                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3217                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3218
3219                 s->current_picture.error[0] += sse(
3220                     s,
3221                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3222                     s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3223                     w, h, s->linesize);
3224                 s->current_picture.error[1] += sse(
3225                     s,
3226                     s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3227                     s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3228                     w>>1, h>>1, s->uvlinesize);
3229                 s->current_picture.error[2] += sse(
3230                     s,
3231                     s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3232                     s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3233                     w>>1, h>>1, s->uvlinesize);
3234             }
3235 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
3236         }
3237
3238
3239         /* Obtain average mb_row size for RTP */
3240         if (s->rtp_mode) {
3241             if (mb_y==0)
3242                 s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
3243             else {
3244                 s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
3245             }
3246             s->ptr_last_mb_line = pbBufPtr(&s->pb);
3247         }
3248     }
3249     emms_c();
3250
3251     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3252         ff_mpeg4_merge_partitions(s);
3253
3254     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3255         msmpeg4_encode_ext_header(s);
3256
3257     if(s->codec_id==CODEC_ID_MPEG4)
3258         ff_mpeg4_stuffing(&s->pb);
3259
3260     //if (s->gob_number)
3261     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3262
3263     /* Send the last GOB if RTP */
3264     if (s->rtp_mode) {
3265         flush_put_bits(&s->pb);
3266         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3267         /* Call the RTP callback to send the last GOB */
3268         if (s->rtp_callback)
3269             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3270         s->ptr_lastgob = pbBufPtr(&s->pb);
3271         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3272     }
3273 }
3274
3275 static int dct_quantize_trellis_c(MpegEncContext *s,
3276                         DCTELEM *block, int n,
3277                         int qscale, int *overflow){
3278     const int *qmat;
3279     const UINT8 *scantable= s->intra_scantable.scantable;
3280     int max=0;
3281     unsigned int threshold1, threshold2;
3282     int bias=0;
3283     int run_tab[65];
3284     int level_tab[65];
3285     int score_tab[65];
3286     int last_run=0;
3287     int last_level=0;
3288     int last_score= 0;
3289     int last_i= 0;
3290     int coeff[3][64];
3291     int coeff_count[64];
3292     int lambda, qmul, qadd, start_i, last_non_zero, i;
3293     const int esc_length= s->ac_esc_length;
3294     uint8_t * length;
3295     uint8_t * last_length;
3296     int score_limit=0;
3297     int left_limit= 0;
3298
3299     s->fdct (block);
3300
3301     qmul= qscale*16;
3302     qadd= ((qscale-1)|1)*8;
3303
3304     if (s->mb_intra) {
3305         int q;
3306         if (!s->h263_aic) {
3307             if (n < 4)
3308                 q = s->y_dc_scale;
3309             else
3310                 q = s->c_dc_scale;
3311             q = q << 3;
3312         } else{
3313             /* For AIC we skip quant/dequant of INTRADC */
3314             q = 1 << 3;
3315             qadd=0;
3316         }
3317
3318         /* note: block[0] is assumed to be positive */
3319         block[0] = (block[0] + (q >> 1)) / q;
3320         start_i = 1;
3321         last_non_zero = 0;
3322         qmat = s->q_intra_matrix[qscale];
3323         if(s->mpeg_quant || s->codec_id== CODEC_ID_MPEG1VIDEO)
3324             bias= 1<<(QMAT_SHIFT-1);
3325         length     = s->intra_ac_vlc_length;
3326         last_length= s->intra_ac_vlc_last_length;
3327     } else {
3328         start_i = 0;
3329         last_non_zero = -1;
3330         qmat = s->q_inter_matrix[qscale];
3331         length     = s->inter_ac_vlc_length;
3332         last_length= s->inter_ac_vlc_last_length;
3333     }
3334
3335     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3336     threshold2= (threshold1<<1);
3337
3338     for(i=start_i; i<64; i++) {
3339         const int j = scantable[i];
3340         const int k= i-start_i;
3341         int level = block[j];
3342         level = level * qmat[j];
3343
3344 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3345 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3346         if(((unsigned)(level+threshold1))>threshold2){
3347             if(level>0){
3348                 level= (bias + level)>>QMAT_SHIFT;
3349                 coeff[0][k]= level;
3350                 coeff[1][k]= level-1;
3351 //                coeff[2][k]= level-2;
3352             }else{
3353                 level= (bias - level)>>QMAT_SHIFT;
3354                 coeff[0][k]= -level;
3355                 coeff[1][k]= -level+1;
3356 //                coeff[2][k]= -level+2;
3357             }
3358             coeff_count[k]= FFMIN(level, 2);
3359             max |=level;
3360             last_non_zero = i;
3361         }else{
3362             coeff[0][k]= (level>>31)|1;
3363             coeff_count[k]= 1;
3364         }
3365     }
3366
3367     *overflow= s->max_qcoeff < max; //overflow might have happend
3368
3369     if(last_non_zero < start_i){
3370         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3371         return last_non_zero;
3372     }
3373
3374     lambda= (qscale*qscale*64*82 + 50)/100; //FIXME finetune
3375
3376     score_tab[0]= 0;
3377     for(i=0; i<=last_non_zero - start_i; i++){
3378         int level_index, run, j;
3379         const int dct_coeff= block[ scantable[i + start_i] ];
3380         const int zero_distoration= dct_coeff*dct_coeff;
3381         int best_score=256*256*256*120;
3382
3383         last_score += zero_distoration;
3384         for(level_index=0; level_index < coeff_count[i]; level_index++){
3385             int distoration;
3386             int level= coeff[level_index][i];
3387             int unquant_coeff;
3388
3389             assert(level);
3390
3391             if(s->out_format == FMT_H263){
3392                 if(level>0){
3393                     unquant_coeff= level*qmul + qadd;
3394                 }else{
3395                     unquant_coeff= level*qmul - qadd;
3396                 }
3397             }else{ //MPEG1
3398                 j= s->idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
3399                 if(s->mb_intra){
3400                     if (level < 0) {
3401                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
3402                         unquant_coeff = -((unquant_coeff - 1) | 1);
3403                     } else {
3404                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
3405                         unquant_coeff =   (unquant_coeff - 1) | 1;
3406                     }
3407                 }else{
3408                     if (level < 0) {
3409                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3410                         unquant_coeff = -((unquant_coeff - 1) | 1);
3411                     } else {
3412                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3413                         unquant_coeff =   (unquant_coeff - 1) | 1;
3414                     }
3415                 }
3416                 unquant_coeff<<= 3;
3417             }
3418
3419             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
3420             level+=64;
3421             if((level&(~127)) == 0){
3422                 for(run=0; run<=i - left_limit; run++){
3423                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3424                     score += score_tab[i-run];
3425
3426                     if(score < best_score){
3427                         best_score=
3428                         score_tab[i+1]= score;
3429                         run_tab[i+1]= run;
3430                         level_tab[i+1]= level-64;
3431                     }
3432                 }
3433
3434                 if(s->out_format == FMT_H263){
3435                     for(run=0; run<=i - left_limit; run++){
3436                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3437                         score += score_tab[i-run];
3438                         if(score < last_score){
3439                             last_score= score;
3440                             last_run= run;
3441                             last_level= level-64;
3442                             last_i= i+1;
3443                         }
3444                     }
3445                 }
3446             }else{
3447                 distoration += esc_length*lambda;
3448                 for(run=0; run<=i - left_limit; run++){
3449                     int score= distoration + score_tab[i-run];
3450
3451                     if(score < best_score){
3452                         best_score=
3453                         score_tab[i+1]= score;
3454                         run_tab[i+1]= run;
3455                         level_tab[i+1]= level-64;
3456                     }
3457                 }
3458
3459                 if(s->out_format == FMT_H263){
3460                     for(run=0; run<=i - left_limit; run++){
3461                         int score= distoration + score_tab[i-run];
3462                         if(score < last_score){
3463                             last_score= score;
3464                             last_run= run;
3465                             last_level= level-64;
3466                             last_i= i+1;
3467                         }
3468                     }
3469                 }
3470             }
3471         }
3472
3473         for(j=left_limit; j<=i; j++){
3474             score_tab[j] += zero_distoration;
3475         }
3476         score_limit+= zero_distoration;
3477         if(score_tab[i+1] < score_limit)
3478             score_limit= score_tab[i+1];
3479
3480         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3481         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
3482     }
3483
3484         //FIXME add some cbp penalty
3485
3486     if(s->out_format != FMT_H263){
3487         last_score= 256*256*256*120;
3488         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
3489             int score= score_tab[i];
3490             if(i) score += lambda*2; //FIXME exacter?
3491
3492             if(score < last_score){
3493                 last_score= score;
3494                 last_i= i;
3495                 last_level= level_tab[i];
3496                 last_run= run_tab[i];
3497             }
3498         }
3499     }
3500
3501     last_non_zero= last_i - 1 + start_i;
3502     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3503
3504     if(last_non_zero < start_i)
3505         return last_non_zero;
3506
3507     i= last_i;
3508     assert(last_level);
3509 //FIXME use permutated scantable
3510     block[ s->idct_permutation[ scantable[last_non_zero] ] ]= last_level;
3511     i -= last_run + 1;
3512
3513     for(;i>0 ; i -= run_tab[i] + 1){
3514         const int j= s->idct_permutation[ scantable[i - 1 + start_i] ];
3515
3516         block[j]= level_tab[i];
3517         assert(block[j]);
3518     }
3519
3520     return last_non_zero;
3521 }
3522
3523 static int dct_quantize_c(MpegEncContext *s,
3524                         DCTELEM *block, int n,
3525                         int qscale, int *overflow)
3526 {
3527     int i, j, level, last_non_zero, q;
3528     const int *qmat;
3529     const UINT8 *scantable= s->intra_scantable.scantable;
3530     int bias;
3531     int max=0;
3532     unsigned int threshold1, threshold2;
3533
3534     s->fdct (block);
3535
3536     if (s->mb_intra) {
3537         if (!s->h263_aic) {
3538             if (n < 4)
3539                 q = s->y_dc_scale;
3540             else
3541                 q = s->c_dc_scale;
3542             q = q << 3;
3543         } else
3544             /* For AIC we skip quant/dequant of INTRADC */
3545             q = 1 << 3;
3546
3547         /* note: block[0] is assumed to be positive */
3548         block[0] = (block[0] + (q >> 1)) / q;
3549         i = 1;
3550         last_non_zero = 0;
3551         qmat = s->q_intra_matrix[qscale];
3552         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3553     } else {
3554         i = 0;
3555         last_non_zero = -1;
3556         qmat = s->q_inter_matrix[qscale];
3557         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3558     }
3559     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3560     threshold2= (threshold1<<1);
3561
3562     for(;i<64;i++) {
3563         j = scantable[i];
3564         level = block[j];
3565         level = level * qmat[j];
3566
3567 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3568 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3569         if(((unsigned)(level+threshold1))>threshold2){
3570             if(level>0){
3571                 level= (bias + level)>>QMAT_SHIFT;
3572                 block[j]= level;
3573             }else{
3574                 level= (bias - level)>>QMAT_SHIFT;
3575                 block[j]= -level;
3576             }
3577             max |=level;
3578             last_non_zero = i;
3579         }else{
3580             block[j]=0;
3581         }
3582     }
3583     *overflow= s->max_qcoeff < max; //overflow might have happend
3584
3585     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3586     if (s->idct_permutation_type != FF_NO_IDCT_PERM)
3587         ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
3588
3589     return last_non_zero;
3590 }
3591
3592 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
3593                                    DCTELEM *block, int n, int qscale)
3594 {
3595     int i, level, nCoeffs;
3596     const UINT16 *quant_matrix;
3597
3598     nCoeffs= s->block_last_index[n];
3599
3600     if (s->mb_intra) {
3601         if (n < 4)
3602             block[0] = block[0] * s->y_dc_scale;
3603         else
3604             block[0] = block[0] * s->c_dc_scale;
3605         /* XXX: only mpeg1 */
3606         quant_matrix = s->intra_matrix;
3607         for(i=1;i<=nCoeffs;i++) {
3608             int j= s->intra_scantable.permutated[i];
3609             level = block[j];
3610             if (level) {
3611                 if (level < 0) {
3612                     level = -level;
3613                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3614                     level = (level - 1) | 1;
3615                     level = -level;
3616                 } else {
3617                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3618                     level = (level - 1) | 1;
3619                 }
3620 #ifdef PARANOID
3621                 if (level < -2048 || level > 2047)
3622                     fprintf(stderr, "unquant error %d %d\n", i, level);
3623 #endif
3624                 block[j] = level;
3625             }
3626         }
3627     } else {
3628         i = 0;
3629         quant_matrix = s->inter_matrix;
3630         for(;i<=nCoeffs;i++) {
3631             int j= s->intra_scantable.permutated[i];
3632             level = block[j];
3633             if (level) {
3634                 if (level < 0) {
3635                     level = -level;
3636                     level = (((level << 1) + 1) * qscale *
3637                              ((int) (quant_matrix[j]))) >> 4;
3638                     level = (level - 1) | 1;
3639                     level = -level;
3640                 } else {
3641                     level = (((level << 1) + 1) * qscale *
3642                              ((int) (quant_matrix[j]))) >> 4;
3643                     level = (level - 1) | 1;
3644                 }
3645 #ifdef PARANOID
3646                 if (level < -2048 || level > 2047)
3647                     fprintf(stderr, "unquant error %d %d\n", i, level);
3648 #endif
3649                 block[j] = level;
3650             }
3651         }
3652     }
3653 }
3654
3655 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
3656                                    DCTELEM *block, int n, int qscale)
3657 {
3658     int i, level, nCoeffs;
3659     const UINT16 *quant_matrix;
3660
3661     if(s->alternate_scan) nCoeffs= 63;
3662     else nCoeffs= s->block_last_index[n];
3663
3664     if (s->mb_intra) {
3665         if (n < 4)
3666             block[0] = block[0] * s->y_dc_scale;
3667         else
3668             block[0] = block[0] * s->c_dc_scale;
3669         quant_matrix = s->intra_matrix;
3670         for(i=1;i<=nCoeffs;i++) {
3671             int j= s->intra_scantable.permutated[i];
3672             level = block[j];
3673             if (level) {
3674                 if (level < 0) {
3675                     level = -level;
3676                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3677                     level = -level;
3678                 } else {
3679                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3680                 }
3681 #ifdef PARANOID
3682                 if (level < -2048 || level > 2047)
3683                     fprintf(stderr, "unquant error %d %d\n", i, level);
3684 #endif
3685                 block[j] = level;
3686             }
3687         }
3688     } else {
3689         int sum=-1;
3690         i = 0;
3691         quant_matrix = s->inter_matrix;
3692         for(;i<=nCoeffs;i++) {
3693             int j= s->intra_scantable.permutated[i];
3694             level = block[j];
3695             if (level) {
3696                 if (level < 0) {
3697                     level = -level;
3698                     level = (((level << 1) + 1) * qscale *
3699                              ((int) (quant_matrix[j]))) >> 4;
3700                     level = -level;
3701                 } else {
3702                     level = (((level << 1) + 1) * qscale *
3703                              ((int) (quant_matrix[j]))) >> 4;
3704                 }
3705 #ifdef PARANOID
3706                 if (level < -2048 || level > 2047)
3707                     fprintf(stderr, "unquant error %d %d\n", i, level);
3708 #endif
3709                 block[j] = level;
3710                 sum+=level;
3711             }
3712         }
3713         block[63]^=sum&1;
3714     }
3715 }
3716
3717
3718 static void dct_unquantize_h263_c(MpegEncContext *s,
3719                                   DCTELEM *block, int n, int qscale)
3720 {
3721     int i, level, qmul, qadd;
3722     int nCoeffs;
3723
3724     assert(s->block_last_index[n]>=0);
3725
3726     qadd = (qscale - 1) | 1;
3727     qmul = qscale << 1;
3728
3729     if (s->mb_intra) {
3730         if (!s->h263_aic) {
3731             if (n < 4)
3732                 block[0] = block[0] * s->y_dc_scale;
3733             else
3734                 block[0] = block[0] * s->c_dc_scale;
3735         }else
3736             qadd = 0;
3737         i = 1;
3738         nCoeffs= 63; //does not allways use zigzag table
3739     } else {
3740         i = 0;
3741         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
3742     }
3743
3744     for(;i<=nCoeffs;i++) {
3745         level = block[i];
3746         if (level) {
3747             if (level < 0) {
3748                 level = level * qmul - qadd;
3749             } else {
3750                 level = level * qmul + qadd;
3751             }
3752 #ifdef PARANOID
3753                 if (level < -2048 || level > 2047)
3754                     fprintf(stderr, "unquant error %d %d\n", i, level);
3755 #endif
3756             block[i] = level;
3757         }
3758     }
3759 }
3760
3761 char ff_get_pict_type_char(int pict_type){
3762     switch(pict_type){
3763     case I_TYPE: return 'I';
3764     case P_TYPE: return 'P';
3765     case B_TYPE: return 'B';
3766     case S_TYPE: return 'S';
3767     default:     return '?';
3768     }
3769 }
3770
3771 AVCodec mpeg1video_encoder = {
3772     "mpeg1video",
3773     CODEC_TYPE_VIDEO,
3774     CODEC_ID_MPEG1VIDEO,
3775     sizeof(MpegEncContext),
3776     MPV_encode_init,
3777     MPV_encode_picture,
3778     MPV_encode_end,
3779 };
3780
3781 AVCodec h263_encoder = {
3782     "h263",
3783     CODEC_TYPE_VIDEO,
3784     CODEC_ID_H263,
3785     sizeof(MpegEncContext),
3786     MPV_encode_init,
3787     MPV_encode_picture,
3788     MPV_encode_end,
3789 };
3790
3791 AVCodec h263p_encoder = {
3792     "h263p",
3793     CODEC_TYPE_VIDEO,
3794     CODEC_ID_H263P,
3795     sizeof(MpegEncContext),
3796     MPV_encode_init,
3797     MPV_encode_picture,
3798     MPV_encode_end,
3799 };
3800
3801 AVCodec rv10_encoder = {
3802     "rv10",
3803     CODEC_TYPE_VIDEO,
3804     CODEC_ID_RV10,
3805     sizeof(MpegEncContext),
3806     MPV_encode_init,
3807     MPV_encode_picture,
3808     MPV_encode_end,
3809 };
3810
3811 AVCodec mjpeg_encoder = {
3812     "mjpeg",
3813     CODEC_TYPE_VIDEO,
3814     CODEC_ID_MJPEG,
3815     sizeof(MpegEncContext),
3816     MPV_encode_init,
3817     MPV_encode_picture,
3818     MPV_encode_end,
3819 };
3820
3821 AVCodec mpeg4_encoder = {
3822     "mpeg4",
3823     CODEC_TYPE_VIDEO,
3824     CODEC_ID_MPEG4,
3825     sizeof(MpegEncContext),
3826     MPV_encode_init,
3827     MPV_encode_picture,
3828     MPV_encode_end,
3829 };
3830
3831 AVCodec msmpeg4v1_encoder = {
3832     "msmpeg4v1",
3833     CODEC_TYPE_VIDEO,
3834     CODEC_ID_MSMPEG4V1,
3835     sizeof(MpegEncContext),
3836     MPV_encode_init,
3837     MPV_encode_picture,
3838     MPV_encode_end,
3839 };
3840
3841 AVCodec msmpeg4v2_encoder = {
3842     "msmpeg4v2",
3843     CODEC_TYPE_VIDEO,
3844     CODEC_ID_MSMPEG4V2,
3845     sizeof(MpegEncContext),
3846     MPV_encode_init,
3847     MPV_encode_picture,
3848     MPV_encode_end,
3849 };
3850
3851 AVCodec msmpeg4v3_encoder = {
3852     "msmpeg4",
3853     CODEC_TYPE_VIDEO,
3854     CODEC_ID_MSMPEG4V3,
3855     sizeof(MpegEncContext),
3856     MPV_encode_init,
3857     MPV_encode_picture,
3858     MPV_encode_end,
3859 };
3860
3861 AVCodec wmv1_encoder = {
3862     "wmv1",
3863     CODEC_TYPE_VIDEO,
3864     CODEC_ID_WMV1,
3865     sizeof(MpegEncContext),
3866     MPV_encode_init,
3867     MPV_encode_picture,
3868     MPV_encode_end,
3869 };
3870