git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "mpegvideo_common.h"
  34 #include "mjpegenc.h"
  35 #include "msmpeg4.h"
  36 #include "faandct.h"
  37 #include <limits.h>
  38
  39 //#undef NDEBUG
  40 //#include <assert.h>
  41
  42 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  43                                    DCTELEM *block, int n, int qscale);
  44 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57
  58 #ifdef HAVE_XVMC
  59 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  60 extern void XVMC_field_end(MpegEncContext *s);
  61 extern void XVMC_decode_mb(MpegEncContext *s);
  62 #endif
  63
  64 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  65
  66
  67 /* enable all paranoid tests for rounding, overflows, etc... */
  68 //#define PARANOID
  69
  70 //#define DEBUG
  71
  72
  73 static const uint8_t ff_default_chroma_qscale_table[32]={
  74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  75     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  76 };
  77
  78 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
  79     int i;
  80     int end;
  81
  82     st->scantable= src_scantable;
  83
  84     for(i=0; i<64; i++){
  85         int j;
  86         j = src_scantable[i];
  87         st->permutated[i] = permutation[j];
  88 #ifdef ARCH_POWERPC
  89         st->inverse[j] = i;
  90 #endif
  91     }
  92
  93     end=-1;
  94     for(i=0; i<64; i++){
  95         int j;
  96         j = st->permutated[i];
  97         if(j>end) end=j;
  98         st->raster_end[i]= end;
  99     }
 100 }
 101
 102 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 103     int i;
 104
 105     assert(p<=end);
 106     if(p>=end)
 107         return end;
 108
 109     for(i=0; i<3; i++){
 110         uint32_t tmp= *state << 8;
 111         *state= tmp + *(p++);
 112         if(tmp == 0x100 || p==end)
 113             return p;
 114     }
 115
 116     while(p<end){
 117         if     (p[-1] > 1      ) p+= 3;
 118         else if(p[-2]          ) p+= 2;
 119         else if(p[-3]|(p[-1]-1)) p++;
 120         else{
 121             p++;
 122             break;
 123         }
 124     }
 125
 126     p= FFMIN(p, end)-4;
 127     *state= AV_RB32(p);
 128
 129     return p+4;
 130 }
 131
 132 /* init common dct for both encoder and decoder */
 133 int ff_dct_common_init(MpegEncContext *s)
 134 {
 135     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 136     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 137     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 138     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 139     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 140     if(s->flags & CODEC_FLAG_BITEXACT)
 141         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 142     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 143
 144 #if defined(HAVE_MMX)
 145     MPV_common_init_mmx(s);
 146 #elif defined(ARCH_ALPHA)
 147     MPV_common_init_axp(s);
 148 #elif defined(HAVE_MLIB)
 149     MPV_common_init_mlib(s);
 150 #elif defined(HAVE_MMI)
 151     MPV_common_init_mmi(s);
 152 #elif defined(ARCH_ARMV4L)
 153     MPV_common_init_armv4l(s);
 154 #elif defined(HAVE_ALTIVEC)
 155     MPV_common_init_altivec(s);
 156 #elif defined(ARCH_BFIN)
 157     MPV_common_init_bfin(s);
 158 #endif
 159
 160     /* load & permutate scantables
 161        note: only wmv uses different ones
 162     */
 163     if(s->alternate_scan){
 164         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 165         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 166     }else{
 167         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 168         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 169     }
 170     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 171     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 172
 173     return 0;
 174 }
 175
 176 void copy_picture(Picture *dst, Picture *src){
 177     *dst = *src;
 178     dst->type= FF_BUFFER_TYPE_COPY;
 179 }
 180
 181 /**
 182  * allocates a Picture
 183  * The pixels are allocated/set by calling get_buffer() if shared=0
 184  */
 185 int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 186     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 187     const int mb_array_size= s->mb_stride*s->mb_height;
 188     const int b8_array_size= s->b8_stride*s->mb_height*2;
 189     const int b4_array_size= s->b4_stride*s->mb_height*4;
 190     int i;
 191     int r= -1;
 192
 193     if(shared){
 194         assert(pic->data[0]);
 195         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 196         pic->type= FF_BUFFER_TYPE_SHARED;
 197     }else{
 198         assert(!pic->data[0]);
 199
 200         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 201
 202         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 203             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 204             return -1;
 205         }
 206
 207         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 208             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 209             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 210             return -1;
 211         }
 212
 213         if(pic->linesize[1] != pic->linesize[2]){
 214             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 215             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 216             return -1;
 217         }
 218
 219         s->linesize  = pic->linesize[0];
 220         s->uvlinesize= pic->linesize[1];
 221     }
 222
 223     if(pic->qscale_table==NULL){
 224         if (s->encoding) {
 225             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 226             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 227             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 228         }
 229
 230         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 231         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 232         CHECKED_ALLOCZ(pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t))
 233         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
 234         if(s->out_format == FMT_H264){
 235             for(i=0; i<2; i++){
 236                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 237                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 238                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 239             }
 240             pic->motion_subsample_log2= 2;
 241         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 242             for(i=0; i<2; i++){
 243                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 244                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 245                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 246             }
 247             pic->motion_subsample_log2= 3;
 248         }
 249         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 250             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 251         }
 252         pic->qstride= s->mb_stride;
 253         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 254     }
 255
 256     /* It might be nicer if the application would keep track of these
 257      * but it would require an API change. */
 258     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 259     s->prev_pict_types[0]= s->pict_type;
 260     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 261         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 262
 263     return 0;
 264 fail: //for the CHECKED_ALLOCZ macro
 265     if(r>=0)
 266         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 267     return -1;
 268 }
 269
 270 /**
 271  * deallocates a picture
 272  */
 273 static void free_picture(MpegEncContext *s, Picture *pic){
 274     int i;
 275
 276     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 277         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 278     }
 279
 280     av_freep(&pic->mb_var);
 281     av_freep(&pic->mc_mb_var);
 282     av_freep(&pic->mb_mean);
 283     av_freep(&pic->mbskip_table);
 284     av_freep(&pic->qscale_table);
 285     av_freep(&pic->mb_type_base);
 286     av_freep(&pic->dct_coeff);
 287     av_freep(&pic->pan_scan);
 288     pic->mb_type= NULL;
 289     for(i=0; i<2; i++){
 290         av_freep(&pic->motion_val_base[i]);
 291         av_freep(&pic->ref_index[i]);
 292     }
 293
 294     if(pic->type == FF_BUFFER_TYPE_SHARED){
 295         for(i=0; i<4; i++){
 296             pic->base[i]=
 297             pic->data[i]= NULL;
 298         }
 299         pic->type= 0;
 300     }
 301 }
 302
 303 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 304     int i;
 305
 306     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 307     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 308     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 309
 310      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 311     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 312     s->rd_scratchpad=   s->me.scratchpad;
 313     s->b_scratchpad=    s->me.scratchpad;
 314     s->obmc_scratchpad= s->me.scratchpad + 16;
 315     if (s->encoding) {
 316         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 317         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 318         if(s->avctx->noise_reduction){
 319             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 320         }
 321     }
 322     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 323     s->block= s->blocks[0];
 324
 325     for(i=0;i<12;i++){
 326         s->pblocks[i] = (short *)(&s->block[i]);
 327     }
 328     return 0;
 329 fail:
 330     return -1; //free() through MPV_common_end()
 331 }
 332
 333 static void free_duplicate_context(MpegEncContext *s){
 334     if(s==NULL) return;
 335
 336     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 337     av_freep(&s->me.scratchpad);
 338     s->rd_scratchpad=
 339     s->b_scratchpad=
 340     s->obmc_scratchpad= NULL;
 341
 342     av_freep(&s->dct_error_sum);
 343     av_freep(&s->me.map);
 344     av_freep(&s->me.score_map);
 345     av_freep(&s->blocks);
 346     s->block= NULL;
 347 }
 348
 349 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 350 #define COPY(a) bak->a= src->a
 351     COPY(allocated_edge_emu_buffer);
 352     COPY(edge_emu_buffer);
 353     COPY(me.scratchpad);
 354     COPY(rd_scratchpad);
 355     COPY(b_scratchpad);
 356     COPY(obmc_scratchpad);
 357     COPY(me.map);
 358     COPY(me.score_map);
 359     COPY(blocks);
 360     COPY(block);
 361     COPY(start_mb_y);
 362     COPY(end_mb_y);
 363     COPY(me.map_generation);
 364     COPY(pb);
 365     COPY(dct_error_sum);
 366     COPY(dct_count[0]);
 367     COPY(dct_count[1]);
 368 #undef COPY
 369 }
 370
 371 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 372     MpegEncContext bak;
 373     int i;
 374     //FIXME copy only needed parts
 375 //START_TIMER
 376     backup_duplicate_context(&bak, dst);
 377     memcpy(dst, src, sizeof(MpegEncContext));
 378     backup_duplicate_context(dst, &bak);
 379     for(i=0;i<12;i++){
 380         dst->pblocks[i] = (short *)(&dst->block[i]);
 381     }
 382 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 383 }
 384
 385 /**
 386  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 387  * the changed fields will not depend upon the prior state of the MpegEncContext.
 388  */
 389 void MPV_common_defaults(MpegEncContext *s){
 390     s->y_dc_scale_table=
 391     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 392     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 393     s->progressive_frame= 1;
 394     s->progressive_sequence= 1;
 395     s->picture_structure= PICT_FRAME;
 396
 397     s->coded_picture_number = 0;
 398     s->picture_number = 0;
 399     s->input_picture_number = 0;
 400
 401     s->picture_in_gop_number = 0;
 402
 403     s->f_code = 1;
 404     s->b_code = 1;
 405 }
 406
 407 /**
 408  * sets the given MpegEncContext to defaults for decoding.
 409  * the changed fields will not depend upon the prior state of the MpegEncContext.
 410  */
 411 void MPV_decode_defaults(MpegEncContext *s){
 412     MPV_common_defaults(s);
 413 }
 414
 415 /**
 416  * init common structure for both encoder and decoder.
 417  * this assumes that some variables like width/height are already set
 418  */
 419 int MPV_common_init(MpegEncContext *s)
 420 {
 421     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
 422
 423     s->mb_height = (s->height + 15) / 16;
 424
 425     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 426         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 427         return -1;
 428     }
 429
 430     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 431         return -1;
 432
 433     dsputil_init(&s->dsp, s->avctx);
 434     ff_dct_common_init(s);
 435
 436     s->flags= s->avctx->flags;
 437     s->flags2= s->avctx->flags2;
 438
 439     s->mb_width  = (s->width  + 15) / 16;
 440     s->mb_stride = s->mb_width + 1;
 441     s->b8_stride = s->mb_width*2 + 1;
 442     s->b4_stride = s->mb_width*4 + 1;
 443     mb_array_size= s->mb_height * s->mb_stride;
 444     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 445
 446     /* set chroma shifts */
 447     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 448                                                     &(s->chroma_y_shift) );
 449
 450     /* set default edge pos, will be overriden in decode_header if needed */
 451     s->h_edge_pos= s->mb_width*16;
 452     s->v_edge_pos= s->mb_height*16;
 453
 454     s->mb_num = s->mb_width * s->mb_height;
 455
 456     s->block_wrap[0]=
 457     s->block_wrap[1]=
 458     s->block_wrap[2]=
 459     s->block_wrap[3]= s->b8_stride;
 460     s->block_wrap[4]=
 461     s->block_wrap[5]= s->mb_stride;
 462
 463     y_size = s->b8_stride * (2 * s->mb_height + 1);
 464     c_size = s->mb_stride * (s->mb_height + 1);
 465     yc_size = y_size + 2 * c_size;
 466
 467     /* convert fourcc to upper case */
 468     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 469                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 470                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 471                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 472
 473     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 474                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 475                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 476                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 477
 478     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 479
 480     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 481     for(y=0; y<s->mb_height; y++){
 482         for(x=0; x<s->mb_width; x++){
 483             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 484         }
 485     }
 486     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 487
 488     if (s->encoding) {
 489         /* Allocate MV tables */
 490         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 491         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 492         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 493         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 494         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 495         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 496         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 497         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 498         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 499         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 500         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 501         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 502
 503         if(s->msmpeg4_version){
 504             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 505         }
 506         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 507
 508         /* Allocate MB type table */
 509         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 510
 511         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 512
 513         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 514         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 515         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 516         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 517         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 518         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 519
 520         if(s->avctx->noise_reduction){
 521             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 522         }
 523     }
 524     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 525
 526     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 527
 528     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 529         /* interlaced direct mode decoding tables */
 530             for(i=0; i<2; i++){
 531                 int j, k;
 532                 for(j=0; j<2; j++){
 533                     for(k=0; k<2; k++){
 534                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 535                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 536                     }
 537                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 538                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 539                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 540                 }
 541                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 542             }
 543     }
 544     if (s->out_format == FMT_H263) {
 545         /* ac values */
 546         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 547         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 548         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 549         s->ac_val[2] = s->ac_val[1] + c_size;
 550
 551         /* cbp values */
 552         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 553         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 554
 555         /* cbp, ac_pred, pred_dir */
 556         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 557         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 558     }
 559
 560     if (s->h263_pred || s->h263_plus || !s->encoding) {
 561         /* dc values */
 562         //MN: we need these for error resilience of intra-frames
 563         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 564         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 565         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 566         s->dc_val[2] = s->dc_val[1] + c_size;
 567         for(i=0;i<yc_size;i++)
 568             s->dc_val_base[i] = 1024;
 569     }
 570
 571     /* which mb is a intra block */
 572     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 573     memset(s->mbintra_table, 1, mb_array_size);
 574
 575     /* init macroblock skip table */
 576     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 577     //Note the +1 is for a quicker mpeg4 slice_end detection
 578     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 579
 580     s->parse_context.state= -1;
 581     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 582        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 583        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 584        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 585     }
 586
 587     s->context_initialized = 1;
 588
 589     s->thread_context[0]= s;
 590     /* h264 does thread context setup itself, but it needs context[0]
 591      * to be fully initialized for the error resilience code */
 592     threads = s->codec_id == CODEC_ID_H264 ? 1 : s->avctx->thread_count;
 593
 594     for(i=1; i<threads; i++){
 595         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 596         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 597     }
 598
 599     for(i=0; i<threads; i++){
 600         if(init_duplicate_context(s->thread_context[i], s) < 0)
 601            goto fail;
 602         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 603         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 604     }
 605
 606     return 0;
 607  fail:
 608     MPV_common_end(s);
 609     return -1;
 610 }
 611
 612 /* init common structure for both encoder and decoder */
 613 void MPV_common_end(MpegEncContext *s)
 614 {
 615     int i, j, k;
 616
 617     for(i=0; i<s->avctx->thread_count; i++){
 618         free_duplicate_context(s->thread_context[i]);
 619     }
 620     for(i=1; i<s->avctx->thread_count; i++){
 621         av_freep(&s->thread_context[i]);
 622     }
 623
 624     av_freep(&s->parse_context.buffer);
 625     s->parse_context.buffer_size=0;
 626
 627     av_freep(&s->mb_type);
 628     av_freep(&s->p_mv_table_base);
 629     av_freep(&s->b_forw_mv_table_base);
 630     av_freep(&s->b_back_mv_table_base);
 631     av_freep(&s->b_bidir_forw_mv_table_base);
 632     av_freep(&s->b_bidir_back_mv_table_base);
 633     av_freep(&s->b_direct_mv_table_base);
 634     s->p_mv_table= NULL;
 635     s->b_forw_mv_table= NULL;
 636     s->b_back_mv_table= NULL;
 637     s->b_bidir_forw_mv_table= NULL;
 638     s->b_bidir_back_mv_table= NULL;
 639     s->b_direct_mv_table= NULL;
 640     for(i=0; i<2; i++){
 641         for(j=0; j<2; j++){
 642             for(k=0; k<2; k++){
 643                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 644                 s->b_field_mv_table[i][j][k]=NULL;
 645             }
 646             av_freep(&s->b_field_select_table[i][j]);
 647             av_freep(&s->p_field_mv_table_base[i][j]);
 648             s->p_field_mv_table[i][j]=NULL;
 649         }
 650         av_freep(&s->p_field_select_table[i]);
 651     }
 652
 653     av_freep(&s->dc_val_base);
 654     av_freep(&s->ac_val_base);
 655     av_freep(&s->coded_block_base);
 656     av_freep(&s->mbintra_table);
 657     av_freep(&s->cbp_table);
 658     av_freep(&s->pred_dir_table);
 659
 660     av_freep(&s->mbskip_table);
 661     av_freep(&s->prev_pict_types);
 662     av_freep(&s->bitstream_buffer);
 663     s->allocated_bitstream_buffer_size=0;
 664
 665     av_freep(&s->avctx->stats_out);
 666     av_freep(&s->ac_stats);
 667     av_freep(&s->error_status_table);
 668     av_freep(&s->mb_index2xy);
 669     av_freep(&s->lambda_table);
 670     av_freep(&s->q_intra_matrix);
 671     av_freep(&s->q_inter_matrix);
 672     av_freep(&s->q_intra_matrix16);
 673     av_freep(&s->q_inter_matrix16);
 674     av_freep(&s->input_picture);
 675     av_freep(&s->reordered_input_picture);
 676     av_freep(&s->dct_offset);
 677
 678     if(s->picture){
 679         for(i=0; i<MAX_PICTURE_COUNT; i++){
 680             free_picture(s, &s->picture[i]);
 681         }
 682     }
 683     av_freep(&s->picture);
 684     s->context_initialized = 0;
 685     s->last_picture_ptr=
 686     s->next_picture_ptr=
 687     s->current_picture_ptr= NULL;
 688     s->linesize= s->uvlinesize= 0;
 689
 690     for(i=0; i<3; i++)
 691         av_freep(&s->visualization_buffer[i]);
 692
 693     avcodec_default_free_buffers(s->avctx);
 694 }
 695
 696 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
 697 {
 698     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 699     uint8_t index_run[MAX_RUN+1];
 700     int last, run, level, start, end, i;
 701
 702     /* If table is static, we can quit if rl->max_level[0] is not NULL */
 703     if(static_store && rl->max_level[0])
 704         return;
 705
 706     /* compute max_level[], max_run[] and index_run[] */
 707     for(last=0;last<2;last++) {
 708         if (last == 0) {
 709             start = 0;
 710             end = rl->last;
 711         } else {
 712             start = rl->last;
 713             end = rl->n;
 714         }
 715
 716         memset(max_level, 0, MAX_RUN + 1);
 717         memset(max_run, 0, MAX_LEVEL + 1);
 718         memset(index_run, rl->n, MAX_RUN + 1);
 719         for(i=start;i<end;i++) {
 720             run = rl->table_run[i];
 721             level = rl->table_level[i];
 722             if (index_run[run] == rl->n)
 723                 index_run[run] = i;
 724             if (level > max_level[run])
 725                 max_level[run] = level;
 726             if (run > max_run[level])
 727                 max_run[level] = run;
 728         }
 729         if(static_store)
 730             rl->max_level[last] = static_store[last];
 731         else
 732             rl->max_level[last] = av_malloc(MAX_RUN + 1);
 733         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 734         if(static_store)
 735             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
 736         else
 737             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 738         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 739         if(static_store)
 740             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
 741         else
 742             rl->index_run[last] = av_malloc(MAX_RUN + 1);
 743         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 744     }
 745 }
 746
 747 void init_vlc_rl(RLTable *rl, int use_static)
 748 {
 749     int i, q;
 750
 751     /* Return if static table is already initialized */
 752     if(use_static && rl->rl_vlc[0])
 753         return;
 754
 755     init_vlc(&rl->vlc, 9, rl->n + 1,
 756              &rl->table_vlc[0][1], 4, 2,
 757              &rl->table_vlc[0][0], 4, 2, use_static);
 758
 759
 760     for(q=0; q<32; q++){
 761         int qmul= q*2;
 762         int qadd= (q-1)|1;
 763
 764         if(q==0){
 765             qmul=1;
 766             qadd=0;
 767         }
 768         if(use_static)
 769             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
 770         else
 771             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
 772         for(i=0; i<rl->vlc.table_size; i++){
 773             int code= rl->vlc.table[i][0];
 774             int len = rl->vlc.table[i][1];
 775             int level, run;
 776
 777             if(len==0){ // illegal code
 778                 run= 66;
 779                 level= MAX_LEVEL;
 780             }else if(len<0){ //more bits needed
 781                 run= 0;
 782                 level= code;
 783             }else{
 784                 if(code==rl->n){ //esc
 785                     run= 66;
 786                     level= 0;
 787                 }else{
 788                     run=   rl->table_run  [code] + 1;
 789                     level= rl->table_level[code] * qmul + qadd;
 790                     if(code >= rl->last) run+=192;
 791                 }
 792             }
 793             rl->rl_vlc[q][i].len= len;
 794             rl->rl_vlc[q][i].level= level;
 795             rl->rl_vlc[q][i].run= run;
 796         }
 797     }
 798 }
 799
 800 /* draw the edges of width 'w' of an image of size width, height */
 801 //FIXME check that this is ok for mpeg4 interlaced
 802 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 803 {
 804     uint8_t *ptr, *last_line;
 805     int i;
 806
 807     last_line = buf + (height - 1) * wrap;
 808     for(i=0;i<w;i++) {
 809         /* top and bottom */
 810         memcpy(buf - (i + 1) * wrap, buf, width);
 811         memcpy(last_line + (i + 1) * wrap, last_line, width);
 812     }
 813     /* left and right */
 814     ptr = buf;
 815     for(i=0;i<height;i++) {
 816         memset(ptr - w, ptr[0], w);
 817         memset(ptr + width, ptr[width-1], w);
 818         ptr += wrap;
 819     }
 820     /* corners */
 821     for(i=0;i<w;i++) {
 822         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 823         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 824         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 825         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 826     }
 827 }
 828
 829 int ff_find_unused_picture(MpegEncContext *s, int shared){
 830     int i;
 831
 832     if(shared){
 833         for(i=0; i<MAX_PICTURE_COUNT; i++){
 834             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
 835         }
 836     }else{
 837         for(i=0; i<MAX_PICTURE_COUNT; i++){
 838             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
 839         }
 840         for(i=0; i<MAX_PICTURE_COUNT; i++){
 841             if(s->picture[i].data[0]==NULL) return i;
 842         }
 843     }
 844
 845     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
 846     /* We could return -1, but the codec would crash trying to draw into a
 847      * non-existing frame anyway. This is safer than waiting for a random crash.
 848      * Also the return of this is never useful, an encoder must only allocate
 849      * as much as allowed in the specification. This has no relationship to how
 850      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
 851      * enough for such valid streams).
 852      * Plus, a decoder has to check stream validity and remove frames if too
 853      * many reference frames are around. Waiting for "OOM" is not correct at
 854      * all. Similarly, missing reference frames have to be replaced by
 855      * interpolated/MC frames, anything else is a bug in the codec ...
 856      */
 857     abort();
 858     return -1;
 859 }
 860
 861 static void update_noise_reduction(MpegEncContext *s){
 862     int intra, i;
 863
 864     for(intra=0; intra<2; intra++){
 865         if(s->dct_count[intra] > (1<<16)){
 866             for(i=0; i<64; i++){
 867                 s->dct_error_sum[intra][i] >>=1;
 868             }
 869             s->dct_count[intra] >>= 1;
 870         }
 871
 872         for(i=0; i<64; i++){
 873             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
 874         }
 875     }
 876 }
 877
 878 /**
 879  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
 880  */
 881 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 882 {
 883     int i;
 884     AVFrame *pic;
 885     s->mb_skipped = 0;
 886
 887     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 888
 889     /* mark&release old frames */
 890     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
 891       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
 892         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 893
 894         /* release forgotten pictures */
 895         /* if(mpeg124/h263) */
 896         if(!s->encoding){
 897             for(i=0; i<MAX_PICTURE_COUNT; i++){
 898                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 899                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
 900                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 901                 }
 902             }
 903         }
 904       }
 905     }
 906 alloc:
 907     if(!s->encoding){
 908         /* release non reference frames */
 909         for(i=0; i<MAX_PICTURE_COUNT; i++){
 910             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 911                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
 912             }
 913         }
 914
 915         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
 916             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
 917         else{
 918             i= ff_find_unused_picture(s, 0);
 919             pic= (AVFrame*)&s->picture[i];
 920         }
 921
 922         pic->reference= 0;
 923         if (!s->dropable){
 924             if (s->codec_id == CODEC_ID_H264)
 925                 pic->reference = s->picture_structure;
 926             else if (s->pict_type != B_TYPE)
 927                 pic->reference = 3;
 928         }
 929
 930         pic->coded_picture_number= s->coded_picture_number++;
 931
 932         if( alloc_picture(s, (Picture*)pic, 0) < 0)
 933             return -1;
 934
 935         s->current_picture_ptr= (Picture*)pic;
 936         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
 937         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
 938     }
 939
 940     s->current_picture_ptr->pict_type= s->pict_type;
 941 //    if(s->flags && CODEC_FLAG_QSCALE)
 942   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
 943     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
 944
 945     copy_picture(&s->current_picture, s->current_picture_ptr);
 946
 947     if (s->pict_type != B_TYPE) {
 948         s->last_picture_ptr= s->next_picture_ptr;
 949         if(!s->dropable)
 950             s->next_picture_ptr= s->current_picture_ptr;
 951     }
 952 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
 953         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
 954         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
 955         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
 956         s->pict_type, s->dropable);*/
 957
 958     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
 959     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
 960
 961     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
 962         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
 963         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
 964         goto alloc;
 965     }
 966
 967     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
 968
 969     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
 970         int i;
 971         for(i=0; i<4; i++){
 972             if(s->picture_structure == PICT_BOTTOM_FIELD){
 973                  s->current_picture.data[i] += s->current_picture.linesize[i];
 974             }
 975             s->current_picture.linesize[i] *= 2;
 976             s->last_picture.linesize[i] *=2;
 977             s->next_picture.linesize[i] *=2;
 978         }
 979     }
 980
 981     s->hurry_up= s->avctx->hurry_up;
 982     s->error_resilience= avctx->error_resilience;
 983
 984     /* set dequantizer, we can't do it during init as it might change for mpeg4
 985        and we can't do it in the header decode as init is not called for mpeg4 there yet */
 986     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
 987         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
 988         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
 989     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
 990         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
 991         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
 992     }else{
 993         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
 994         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
 995     }
 996
 997     if(s->dct_error_sum){
 998         assert(s->avctx->noise_reduction && s->encoding);
 999
1000         update_noise_reduction(s);
1001     }
1002
1003 #ifdef HAVE_XVMC
1004     if(s->avctx->xvmc_acceleration)
1005         return XVMC_field_start(s, avctx);
1006 #endif
1007     return 0;
1008 }
1009
1010 /* generic function for encode/decode called after a frame has been coded/decoded */
1011 void MPV_frame_end(MpegEncContext *s)
1012 {
1013     int i;
1014     /* draw edge for correct motion prediction if outside */
1015 #ifdef HAVE_XVMC
1016 //just to make sure that all data is rendered.
1017     if(s->avctx->xvmc_acceleration){
1018         XVMC_field_end(s);
1019     }else
1020 #endif
1021     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1022             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1023             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1024             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1025     }
1026     emms_c();
1027
1028     s->last_pict_type    = s->pict_type;
1029     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1030     if(s->pict_type!=B_TYPE){
1031         s->last_non_b_pict_type= s->pict_type;
1032     }
1033 #if 0
1034         /* copy back current_picture variables */
1035     for(i=0; i<MAX_PICTURE_COUNT; i++){
1036         if(s->picture[i].data[0] == s->current_picture.data[0]){
1037             s->picture[i]= s->current_picture;
1038             break;
1039         }
1040     }
1041     assert(i<MAX_PICTURE_COUNT);
1042 #endif
1043
1044     if(s->encoding){
1045         /* release non-reference frames */
1046         for(i=0; i<MAX_PICTURE_COUNT; i++){
1047             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1048                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1049             }
1050         }
1051     }
1052     // clear copies, to avoid confusion
1053 #if 0
1054     memset(&s->last_picture, 0, sizeof(Picture));
1055     memset(&s->next_picture, 0, sizeof(Picture));
1056     memset(&s->current_picture, 0, sizeof(Picture));
1057 #endif
1058     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1059 }
1060
1061 /**
1062  * draws an line from (ex, ey) -> (sx, sy).
1063  * @param w width of the image
1064  * @param h height of the image
1065  * @param stride stride/linesize of the image
1066  * @param color color of the arrow
1067  */
1068 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1069     int x, y, fr, f;
1070
1071     sx= av_clip(sx, 0, w-1);
1072     sy= av_clip(sy, 0, h-1);
1073     ex= av_clip(ex, 0, w-1);
1074     ey= av_clip(ey, 0, h-1);
1075
1076     buf[sy*stride + sx]+= color;
1077
1078     if(FFABS(ex - sx) > FFABS(ey - sy)){
1079         if(sx > ex){
1080             FFSWAP(int, sx, ex);
1081             FFSWAP(int, sy, ey);
1082         }
1083         buf+= sx + sy*stride;
1084         ex-= sx;
1085         f= ((ey-sy)<<16)/ex;
1086         for(x= 0; x <= ex; x++){
1087             y = (x*f)>>16;
1088             fr= (x*f)&0xFFFF;
1089             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1090             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1091         }
1092     }else{
1093         if(sy > ey){
1094             FFSWAP(int, sx, ex);
1095             FFSWAP(int, sy, ey);
1096         }
1097         buf+= sx + sy*stride;
1098         ey-= sy;
1099         if(ey) f= ((ex-sx)<<16)/ey;
1100         else   f= 0;
1101         for(y= 0; y <= ey; y++){
1102             x = (y*f)>>16;
1103             fr= (y*f)&0xFFFF;
1104             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1105             buf[y*stride + x+1]+= (color*         fr )>>16;;
1106         }
1107     }
1108 }
1109
1110 /**
1111  * draws an arrow from (ex, ey) -> (sx, sy).
1112  * @param w width of the image
1113  * @param h height of the image
1114  * @param stride stride/linesize of the image
1115  * @param color color of the arrow
1116  */
1117 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1118     int dx,dy;
1119
1120     sx= av_clip(sx, -100, w+100);
1121     sy= av_clip(sy, -100, h+100);
1122     ex= av_clip(ex, -100, w+100);
1123     ey= av_clip(ey, -100, h+100);
1124
1125     dx= ex - sx;
1126     dy= ey - sy;
1127
1128     if(dx*dx + dy*dy > 3*3){
1129         int rx=  dx + dy;
1130         int ry= -dx + dy;
1131         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1132
1133         //FIXME subpixel accuracy
1134         rx= ROUNDED_DIV(rx*3<<4, length);
1135         ry= ROUNDED_DIV(ry*3<<4, length);
1136
1137         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1138         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1139     }
1140     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1141 }
1142
1143 /**
1144  * prints debuging info for the given picture.
1145  */
1146 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1147
1148     if(!pict || !pict->mb_type) return;
1149
1150     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1151         int x,y;
1152
1153         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1154         switch (pict->pict_type) {
1155             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1156             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1157             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1158             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1159             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1160             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1161         }
1162         for(y=0; y<s->mb_height; y++){
1163             for(x=0; x<s->mb_width; x++){
1164                 if(s->avctx->debug&FF_DEBUG_SKIP){
1165                     int count= s->mbskip_table[x + y*s->mb_stride];
1166                     if(count>9) count=9;
1167                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1168                 }
1169                 if(s->avctx->debug&FF_DEBUG_QP){
1170                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1171                 }
1172                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1173                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1174                     //Type & MV direction
1175                     if(IS_PCM(mb_type))
1176                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1177                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1178                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1179                     else if(IS_INTRA4x4(mb_type))
1180                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1181                     else if(IS_INTRA16x16(mb_type))
1182                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1183                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1184                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1185                     else if(IS_DIRECT(mb_type))
1186                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1187                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1188                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1189                     else if(IS_GMC(mb_type))
1190                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1191                     else if(IS_SKIP(mb_type))
1192                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1193                     else if(!USES_LIST(mb_type, 1))
1194                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1195                     else if(!USES_LIST(mb_type, 0))
1196                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1197                     else{
1198                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1199                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1200                     }
1201
1202                     //segmentation
1203                     if(IS_8X8(mb_type))
1204                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1205                     else if(IS_16X8(mb_type))
1206                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1207                     else if(IS_8X16(mb_type))
1208                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1209                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1210                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1211                     else
1212                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1213
1214
1215                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1216                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1217                     else
1218                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1219                 }
1220 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1221             }
1222             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1223         }
1224     }
1225
1226     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1227         const int shift= 1 + s->quarter_sample;
1228         int mb_y;
1229         uint8_t *ptr;
1230         int i;
1231         int h_chroma_shift, v_chroma_shift;
1232         const int width = s->avctx->width;
1233         const int height= s->avctx->height;
1234         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1235         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1236         s->low_delay=0; //needed to see the vectors without trashing the buffers
1237
1238         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1239         for(i=0; i<3; i++){
1240             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1241             pict->data[i]= s->visualization_buffer[i];
1242         }
1243         pict->type= FF_BUFFER_TYPE_COPY;
1244         ptr= pict->data[0];
1245
1246         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1247             int mb_x;
1248             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1249                 const int mb_index= mb_x + mb_y*s->mb_stride;
1250                 if((s->avctx->debug_mv) && pict->motion_val){
1251                   int type;
1252                   for(type=0; type<3; type++){
1253                     int direction = 0;
1254                     switch (type) {
1255                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1256                                 continue;
1257                               direction = 0;
1258                               break;
1259                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1260                                 continue;
1261                               direction = 0;
1262                               break;
1263                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1264                                 continue;
1265                               direction = 1;
1266                               break;
1267                     }
1268                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1269                         continue;
1270
1271                     if(IS_8X8(pict->mb_type[mb_index])){
1272                       int i;
1273                       for(i=0; i<4; i++){
1274                         int sx= mb_x*16 + 4 + 8*(i&1);
1275                         int sy= mb_y*16 + 4 + 8*(i>>1);
1276                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1277                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1278                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1279                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1280                       }
1281                     }else if(IS_16X8(pict->mb_type[mb_index])){
1282                       int i;
1283                       for(i=0; i<2; i++){
1284                         int sx=mb_x*16 + 8;
1285                         int sy=mb_y*16 + 4 + 8*i;
1286                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1287                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1288                         int my=(pict->motion_val[direction][xy][1]>>shift);
1289
1290                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1291                             my*=2;
1292
1293                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1294                       }
1295                     }else if(IS_8X16(pict->mb_type[mb_index])){
1296                       int i;
1297                       for(i=0; i<2; i++){
1298                         int sx=mb_x*16 + 4 + 8*i;
1299                         int sy=mb_y*16 + 8;
1300                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1301                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1302                         int my=(pict->motion_val[direction][xy][1]>>shift);
1303
1304                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1305                             my*=2;
1306
1307                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1308                       }
1309                     }else{
1310                       int sx= mb_x*16 + 8;
1311                       int sy= mb_y*16 + 8;
1312                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1313                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1314                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1315                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1316                     }
1317                   }
1318                 }
1319                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1320                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1321                     int y;
1322                     for(y=0; y<8; y++){
1323                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1324                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1325                     }
1326                 }
1327                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1328                     int mb_type= pict->mb_type[mb_index];
1329                     uint64_t u,v;
1330                     int y;
1331 #define COLOR(theta, r)\
1332 u= (int)(128 + r*cos(theta*3.141592/180));\
1333 v= (int)(128 + r*sin(theta*3.141592/180));
1334
1335
1336                     u=v=128;
1337                     if(IS_PCM(mb_type)){
1338                         COLOR(120,48)
1339                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1340                         COLOR(30,48)
1341                     }else if(IS_INTRA4x4(mb_type)){
1342                         COLOR(90,48)
1343                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1344 //                        COLOR(120,48)
1345                     }else if(IS_DIRECT(mb_type)){
1346                         COLOR(150,48)
1347                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1348                         COLOR(170,48)
1349                     }else if(IS_GMC(mb_type)){
1350                         COLOR(190,48)
1351                     }else if(IS_SKIP(mb_type)){
1352 //                        COLOR(180,48)
1353                     }else if(!USES_LIST(mb_type, 1)){
1354                         COLOR(240,48)
1355                     }else if(!USES_LIST(mb_type, 0)){
1356                         COLOR(0,48)
1357                     }else{
1358                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1359                         COLOR(300,48)
1360                     }
1361
1362                     u*= 0x0101010101010101ULL;
1363                     v*= 0x0101010101010101ULL;
1364                     for(y=0; y<8; y++){
1365                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1366                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1367                     }
1368
1369                     //segmentation
1370                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1371                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1372                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1373                     }
1374                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1375                         for(y=0; y<16; y++)
1376                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1377                     }
1378                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1379                         int dm= 1 << (mv_sample_log2-2);
1380                         for(i=0; i<4; i++){
1381                             int sx= mb_x*16 + 8*(i&1);
1382                             int sy= mb_y*16 + 8*(i>>1);
1383                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1384                             //FIXME bidir
1385                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1386                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1387                                 for(y=0; y<8; y++)
1388                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1389                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1390                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1391                         }
1392                     }
1393
1394                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1395                         // hmm
1396                     }
1397                 }
1398                 s->mbskip_table[mb_index]=0;
1399             }
1400         }
1401     }
1402 }
1403
1404 /**
1405  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1406  * @param buf destination buffer
1407  * @param src source buffer
1408  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1409  * @param block_w width of block
1410  * @param block_h height of block
1411  * @param src_x x coordinate of the top left sample of the block in the source buffer
1412  * @param src_y y coordinate of the top left sample of the block in the source buffer
1413  * @param w width of the source buffer
1414  * @param h height of the source buffer
1415  */
1416 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1417                                     int src_x, int src_y, int w, int h){
1418     int x, y;
1419     int start_y, start_x, end_y, end_x;
1420
1421     if(src_y>= h){
1422         src+= (h-1-src_y)*linesize;
1423         src_y=h-1;
1424     }else if(src_y<=-block_h){
1425         src+= (1-block_h-src_y)*linesize;
1426         src_y=1-block_h;
1427     }
1428     if(src_x>= w){
1429         src+= (w-1-src_x);
1430         src_x=w-1;
1431     }else if(src_x<=-block_w){
1432         src+= (1-block_w-src_x);
1433         src_x=1-block_w;
1434     }
1435
1436     start_y= FFMAX(0, -src_y);
1437     start_x= FFMAX(0, -src_x);
1438     end_y= FFMIN(block_h, h-src_y);
1439     end_x= FFMIN(block_w, w-src_x);
1440
1441     // copy existing part
1442     for(y=start_y; y<end_y; y++){
1443         for(x=start_x; x<end_x; x++){
1444             buf[x + y*linesize]= src[x + y*linesize];
1445         }
1446     }
1447
1448     //top
1449     for(y=0; y<start_y; y++){
1450         for(x=start_x; x<end_x; x++){
1451             buf[x + y*linesize]= buf[x + start_y*linesize];
1452         }
1453     }
1454
1455     //bottom
1456     for(y=end_y; y<block_h; y++){
1457         for(x=start_x; x<end_x; x++){
1458             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1459         }
1460     }
1461
1462     for(y=0; y<block_h; y++){
1463        //left
1464         for(x=0; x<start_x; x++){
1465             buf[x + y*linesize]= buf[start_x + y*linesize];
1466         }
1467
1468        //right
1469         for(x=end_x; x<block_w; x++){
1470             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1471         }
1472     }
1473 }
1474
1475 static inline int hpel_motion_lowres(MpegEncContext *s,
1476                                   uint8_t *dest, uint8_t *src,
1477                                   int field_based, int field_select,
1478                                   int src_x, int src_y,
1479                                   int width, int height, int stride,
1480                                   int h_edge_pos, int v_edge_pos,
1481                                   int w, int h, h264_chroma_mc_func *pix_op,
1482                                   int motion_x, int motion_y)
1483 {
1484     const int lowres= s->avctx->lowres;
1485     const int s_mask= (2<<lowres)-1;
1486     int emu=0;
1487     int sx, sy;
1488
1489     if(s->quarter_sample){
1490         motion_x/=2;
1491         motion_y/=2;
1492     }
1493
1494     sx= motion_x & s_mask;
1495     sy= motion_y & s_mask;
1496     src_x += motion_x >> (lowres+1);
1497     src_y += motion_y >> (lowres+1);
1498
1499     src += src_y * stride + src_x;
1500
1501     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1502        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1503         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1504                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1505         src= s->edge_emu_buffer;
1506         emu=1;
1507     }
1508
1509     sx <<= 2 - lowres;
1510     sy <<= 2 - lowres;
1511     if(field_select)
1512         src += s->linesize;
1513     pix_op[lowres](dest, src, stride, h, sx, sy);
1514     return emu;
1515 }
1516
1517 /* apply one mpeg motion vector to the three components */
1518 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1519                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1520                                int field_based, int bottom_field, int field_select,
1521                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1522                                int motion_x, int motion_y, int h)
1523 {
1524     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1525     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1526     const int lowres= s->avctx->lowres;
1527     const int block_s= 8>>lowres;
1528     const int s_mask= (2<<lowres)-1;
1529     const int h_edge_pos = s->h_edge_pos >> lowres;
1530     const int v_edge_pos = s->v_edge_pos >> lowres;
1531     linesize   = s->current_picture.linesize[0] << field_based;
1532     uvlinesize = s->current_picture.linesize[1] << field_based;
1533
1534     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
1535         motion_x/=2;
1536         motion_y/=2;
1537     }
1538
1539     if(field_based){
1540         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1541     }
1542
1543     sx= motion_x & s_mask;
1544     sy= motion_y & s_mask;
1545     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1546     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1547
1548     if (s->out_format == FMT_H263) {
1549         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1550         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1551         uvsrc_x = src_x>>1;
1552         uvsrc_y = src_y>>1;
1553     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1554         mx = motion_x / 4;
1555         my = motion_y / 4;
1556         uvsx = (2*mx) & s_mask;
1557         uvsy = (2*my) & s_mask;
1558         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1559         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
1560     } else {
1561         mx = motion_x / 2;
1562         my = motion_y / 2;
1563         uvsx = mx & s_mask;
1564         uvsy = my & s_mask;
1565         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1566         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
1567     }
1568
1569     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1570     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1571     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1572
1573     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1574        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1575             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1576                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1577             ptr_y = s->edge_emu_buffer;
1578             if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1579                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1580                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1581                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1582                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1583                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1584                 ptr_cb= uvbuf;
1585                 ptr_cr= uvbuf+16;
1586             }
1587     }
1588
1589     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1590         dest_y += s->linesize;
1591         dest_cb+= s->uvlinesize;
1592         dest_cr+= s->uvlinesize;
1593     }
1594
1595     if(field_select){
1596         ptr_y += s->linesize;
1597         ptr_cb+= s->uvlinesize;
1598         ptr_cr+= s->uvlinesize;
1599     }
1600
1601     sx <<= 2 - lowres;
1602     sy <<= 2 - lowres;
1603     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1604
1605     if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1606         uvsx <<= 2 - lowres;
1607         uvsy <<= 2 - lowres;
1608         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1609         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1610     }
1611     //FIXME h261 lowres loop filter
1612 }
1613
1614 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1615                                      uint8_t *dest_cb, uint8_t *dest_cr,
1616                                      uint8_t **ref_picture,
1617                                      h264_chroma_mc_func *pix_op,
1618                                      int mx, int my){
1619     const int lowres= s->avctx->lowres;
1620     const int block_s= 8>>lowres;
1621     const int s_mask= (2<<lowres)-1;
1622     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1623     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1624     int emu=0, src_x, src_y, offset, sx, sy;
1625     uint8_t *ptr;
1626
1627     if(s->quarter_sample){
1628         mx/=2;
1629         my/=2;
1630     }
1631
1632     /* In case of 8X8, we construct a single chroma motion vector
1633        with a special rounding */
1634     mx= ff_h263_round_chroma(mx);
1635     my= ff_h263_round_chroma(my);
1636
1637     sx= mx & s_mask;
1638     sy= my & s_mask;
1639     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1640     src_y = s->mb_y*block_s + (my >> (lowres+1));
1641
1642     offset = src_y * s->uvlinesize + src_x;
1643     ptr = ref_picture[1] + offset;
1644     if(s->flags&CODEC_FLAG_EMU_EDGE){
1645         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1646            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1647             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1648             ptr= s->edge_emu_buffer;
1649             emu=1;
1650         }
1651     }
1652     sx <<= 2 - lowres;
1653     sy <<= 2 - lowres;
1654     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1655
1656     ptr = ref_picture[2] + offset;
1657     if(emu){
1658         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1659         ptr= s->edge_emu_buffer;
1660     }
1661     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1662 }
1663
1664 /**
1665  * motion compensation of a single macroblock
1666  * @param s context
1667  * @param dest_y luma destination pointer
1668  * @param dest_cb chroma cb/u destination pointer
1669  * @param dest_cr chroma cr/v destination pointer
1670  * @param dir direction (0->forward, 1->backward)
1671  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1672  * @param pic_op halfpel motion compensation function (average or put normally)
1673  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1674  */
1675 static inline void MPV_motion_lowres(MpegEncContext *s,
1676                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1677                               int dir, uint8_t **ref_picture,
1678                               h264_chroma_mc_func *pix_op)
1679 {
1680     int mx, my;
1681     int mb_x, mb_y, i;
1682     const int lowres= s->avctx->lowres;
1683     const int block_s= 8>>lowres;
1684
1685     mb_x = s->mb_x;
1686     mb_y = s->mb_y;
1687
1688     switch(s->mv_type) {
1689     case MV_TYPE_16X16:
1690         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1691                     0, 0, 0,
1692                     ref_picture, pix_op,
1693                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1694         break;
1695     case MV_TYPE_8X8:
1696         mx = 0;
1697         my = 0;
1698             for(i=0;i<4;i++) {
1699                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1700                             ref_picture[0], 0, 0,
1701                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1702                             s->width, s->height, s->linesize,
1703                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1704                             block_s, block_s, pix_op,
1705                             s->mv[dir][i][0], s->mv[dir][i][1]);
1706
1707                 mx += s->mv[dir][i][0];
1708                 my += s->mv[dir][i][1];
1709             }
1710
1711         if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1712             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1713         break;
1714     case MV_TYPE_FIELD:
1715         if (s->picture_structure == PICT_FRAME) {
1716             /* top field */
1717             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1718                         1, 0, s->field_select[dir][0],
1719                         ref_picture, pix_op,
1720                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
1721             /* bottom field */
1722             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1723                         1, 1, s->field_select[dir][1],
1724                         ref_picture, pix_op,
1725                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
1726         } else {
1727             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
1728                 ref_picture= s->current_picture_ptr->data;
1729             }
1730
1731             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1732                         0, 0, s->field_select[dir][0],
1733                         ref_picture, pix_op,
1734                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1735         }
1736         break;
1737     case MV_TYPE_16X8:
1738         for(i=0; i<2; i++){
1739             uint8_t ** ref2picture;
1740
1741             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
1742                 ref2picture= ref_picture;
1743             }else{
1744                 ref2picture= s->current_picture_ptr->data;
1745             }
1746
1747             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1748                         0, 0, s->field_select[dir][i],
1749                         ref2picture, pix_op,
1750                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
1751
1752             dest_y += 2*block_s*s->linesize;
1753             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1754             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1755         }
1756         break;
1757     case MV_TYPE_DMV:
1758         if(s->picture_structure == PICT_FRAME){
1759             for(i=0; i<2; i++){
1760                 int j;
1761                 for(j=0; j<2; j++){
1762                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1763                                 1, j, j^i,
1764                                 ref_picture, pix_op,
1765                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
1766                 }
1767                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1768             }
1769         }else{
1770             for(i=0; i<2; i++){
1771                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1772                             0, 0, s->picture_structure != i+1,
1773                             ref_picture, pix_op,
1774                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
1775
1776                 // after put we make avg of the same block
1777                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1778
1779                 //opposite parity is always in the same frame if this is second field
1780                 if(!s->first_field){
1781                     ref_picture = s->current_picture_ptr->data;
1782                 }
1783             }
1784         }
1785     break;
1786     default: assert(0);
1787     }
1788 }
1789
1790 /* put block[] to dest[] */
1791 static inline void put_dct(MpegEncContext *s,
1792                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1793 {
1794     s->dct_unquantize_intra(s, block, i, qscale);
1795     s->dsp.idct_put (dest, line_size, block);
1796 }
1797
1798 /* add block[] to dest[] */
1799 static inline void add_dct(MpegEncContext *s,
1800                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1801 {
1802     if (s->block_last_index[i] >= 0) {
1803         s->dsp.idct_add (dest, line_size, block);
1804     }
1805 }
1806
1807 static inline void add_dequant_dct(MpegEncContext *s,
1808                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1809 {
1810     if (s->block_last_index[i] >= 0) {
1811         s->dct_unquantize_inter(s, block, i, qscale);
1812
1813         s->dsp.idct_add (dest, line_size, block);
1814     }
1815 }
1816
1817 /**
1818  * cleans dc, ac, coded_block for the current non intra MB
1819  */
1820 void ff_clean_intra_table_entries(MpegEncContext *s)
1821 {
1822     int wrap = s->b8_stride;
1823     int xy = s->block_index[0];
1824
1825     s->dc_val[0][xy           ] =
1826     s->dc_val[0][xy + 1       ] =
1827     s->dc_val[0][xy     + wrap] =
1828     s->dc_val[0][xy + 1 + wrap] = 1024;
1829     /* ac pred */
1830     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1831     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1832     if (s->msmpeg4_version>=3) {
1833         s->coded_block[xy           ] =
1834         s->coded_block[xy + 1       ] =
1835         s->coded_block[xy     + wrap] =
1836         s->coded_block[xy + 1 + wrap] = 0;
1837     }
1838     /* chroma */
1839     wrap = s->mb_stride;
1840     xy = s->mb_x + s->mb_y * wrap;
1841     s->dc_val[1][xy] =
1842     s->dc_val[2][xy] = 1024;
1843     /* ac pred */
1844     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1845     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1846
1847     s->mbintra_table[xy]= 0;
1848 }
1849
1850 /* generic function called after a macroblock has been parsed by the
1851    decoder or after it has been encoded by the encoder.
1852
1853    Important variables used:
1854    s->mb_intra : true if intra macroblock
1855    s->mv_dir   : motion vector direction
1856    s->mv_type  : motion vector type
1857    s->mv       : motion vector
1858    s->interlaced_dct : true if interlaced dct used (mpeg2)
1859  */
1860 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
1861 {
1862     int mb_x, mb_y;
1863     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1864 #ifdef HAVE_XVMC
1865     if(s->avctx->xvmc_acceleration){
1866         XVMC_decode_mb(s);//xvmc uses pblocks
1867         return;
1868     }
1869 #endif
1870
1871     mb_x = s->mb_x;
1872     mb_y = s->mb_y;
1873
1874     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1875        /* save DCT coefficients */
1876        int i,j;
1877        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1878        for(i=0; i<6; i++)
1879            for(j=0; j<64; j++)
1880                *dct++ = block[i][s->dsp.idct_permutation[j]];
1881     }
1882
1883     s->current_picture.qscale_table[mb_xy]= s->qscale;
1884
1885     /* update DC predictors for P macroblocks */
1886     if (!s->mb_intra) {
1887         if (s->h263_pred || s->h263_aic) {
1888             if(s->mbintra_table[mb_xy])
1889                 ff_clean_intra_table_entries(s);
1890         } else {
1891             s->last_dc[0] =
1892             s->last_dc[1] =
1893             s->last_dc[2] = 128 << s->intra_dc_precision;
1894         }
1895     }
1896     else if (s->h263_pred || s->h263_aic)
1897         s->mbintra_table[mb_xy]=1;
1898
1899     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1900         uint8_t *dest_y, *dest_cb, *dest_cr;
1901         int dct_linesize, dct_offset;
1902         op_pixels_func (*op_pix)[4];
1903         qpel_mc_func (*op_qpix)[16];
1904         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1905         const int uvlinesize= s->current_picture.linesize[1];
1906         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1907         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1908
1909         /* avoid copy if macroblock skipped in last frame too */
1910         /* skip only during decoding as we might trash the buffers during encoding a bit */
1911         if(!s->encoding){
1912             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1913             const int age= s->current_picture.age;
1914
1915             assert(age);
1916
1917             if (s->mb_skipped) {
1918                 s->mb_skipped= 0;
1919                 assert(s->pict_type!=I_TYPE);
1920
1921                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1922                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1923
1924                 /* if previous was skipped too, then nothing to do !  */
1925                 if (*mbskip_ptr >= age && s->current_picture.reference){
1926                     return;
1927                 }
1928             } else if(!s->current_picture.reference){
1929                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1930                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1931             } else{
1932                 *mbskip_ptr = 0; /* not skipped */
1933             }
1934         }
1935
1936         dct_linesize = linesize << s->interlaced_dct;
1937         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1938
1939         if(readable){
1940             dest_y=  s->dest[0];
1941             dest_cb= s->dest[1];
1942             dest_cr= s->dest[2];
1943         }else{
1944             dest_y = s->b_scratchpad;
1945             dest_cb= s->b_scratchpad+16*linesize;
1946             dest_cr= s->b_scratchpad+32*linesize;
1947         }
1948
1949         if (!s->mb_intra) {
1950             /* motion handling */
1951             /* decoding or more than one mb_type (MC was already done otherwise) */
1952             if(!s->encoding){
1953                 if(lowres_flag){
1954                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1955
1956                     if (s->mv_dir & MV_DIR_FORWARD) {
1957                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1958                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1959                     }
1960                     if (s->mv_dir & MV_DIR_BACKWARD) {
1961                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1962                     }
1963                 }else{
1964                     op_qpix= s->me.qpel_put;
1965                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
1966                         op_pix = s->dsp.put_pixels_tab;
1967                     }else{
1968                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1969                     }
1970                     if (s->mv_dir & MV_DIR_FORWARD) {
1971                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1972                         op_pix = s->dsp.avg_pixels_tab;
1973                         op_qpix= s->me.qpel_avg;
1974                     }
1975                     if (s->mv_dir & MV_DIR_BACKWARD) {
1976                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1977                     }
1978                 }
1979             }
1980
1981             /* skip dequant / idct if we are really late ;) */
1982             if(s->hurry_up>1) goto skip_idct;
1983             if(s->avctx->skip_idct){
1984                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
1985                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
1986                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1987                     goto skip_idct;
1988             }
1989
1990             /* add dct residue */
1991             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1992                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1993                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1994                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1995                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1996                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1997
1998                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1999                     if (s->chroma_y_shift){
2000                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2001                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2002                     }else{
2003                         dct_linesize >>= 1;
2004                         dct_offset >>=1;
2005                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2006                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2007                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2008                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2009                     }
2010                 }
2011             } else if(s->codec_id != CODEC_ID_WMV2){
2012                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
2013                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
2014                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
2015                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
2016
2017                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2018                     if(s->chroma_y_shift){//Chroma420
2019                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2020                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2021                     }else{
2022                         //chroma422
2023                         dct_linesize = uvlinesize << s->interlaced_dct;
2024                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2025
2026                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2027                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2028                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2029                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2030                         if(!s->chroma_x_shift){//Chroma444
2031                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2032                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2033                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2034                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2035                         }
2036                     }
2037                 }//fi gray
2038             }
2039             else if (ENABLE_WMV2) {
2040                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2041             }
2042         } else {
2043             /* dct only in intra block */
2044             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2045                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2046                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2047                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2048                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2049
2050                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2051                     if(s->chroma_y_shift){
2052                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2053                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2054                     }else{
2055                         dct_offset >>=1;
2056                         dct_linesize >>=1;
2057                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2058                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2059                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2060                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2061                     }
2062                 }
2063             }else{
2064                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2065                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2066                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2067                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2068
2069                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2070                     if(s->chroma_y_shift){
2071                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2072                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2073                     }else{
2074
2075                         dct_linesize = uvlinesize << s->interlaced_dct;
2076                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2077
2078                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2079                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2080                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2081                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2082                         if(!s->chroma_x_shift){//Chroma444
2083                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2084                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2085                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2086                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2087                         }
2088                     }
2089                 }//gray
2090             }
2091         }
2092 skip_idct:
2093         if(!readable){
2094             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2095             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2096             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2097         }
2098     }
2099 }
2100
2101 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2102     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
2103     else                  MPV_decode_mb_internal(s, block, 0);
2104 }
2105
2106 /**
2107  *
2108  * @param h is the normal height, this will be reduced automatically if needed for the last row
2109  */
2110 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2111     if (s->avctx->draw_horiz_band) {
2112         AVFrame *src;
2113         int offset[4];
2114
2115         if(s->picture_structure != PICT_FRAME){
2116             h <<= 1;
2117             y <<= 1;
2118             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2119         }
2120
2121         h= FFMIN(h, s->avctx->height - y);
2122
2123         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2124             src= (AVFrame*)s->current_picture_ptr;
2125         else if(s->last_picture_ptr)
2126             src= (AVFrame*)s->last_picture_ptr;
2127         else
2128             return;
2129
2130         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2131             offset[0]=
2132             offset[1]=
2133             offset[2]=
2134             offset[3]= 0;
2135         }else{
2136             offset[0]= y * s->linesize;;
2137             offset[1]=
2138             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2139             offset[3]= 0;
2140         }
2141
2142         emms_c();
2143
2144         s->avctx->draw_horiz_band(s->avctx, src, offset,
2145                                   y, s->picture_structure, h);
2146     }
2147 }
2148
2149 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2150     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2151     const int uvlinesize= s->current_picture.linesize[1];
2152     const int mb_size= 4 - s->avctx->lowres;
2153
2154     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2155     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2156     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2157     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2158     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2159     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2160     //block_index is not used by mpeg2, so it is not affected by chroma_format
2161
2162     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2163     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2164     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2165
2166     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2167     {
2168         s->dest[0] += s->mb_y *   linesize << mb_size;
2169         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2170         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2171     }
2172 }
2173
2174 void ff_mpeg_flush(AVCodecContext *avctx){
2175     int i;
2176     MpegEncContext *s = avctx->priv_data;
2177
2178     if(s==NULL || s->picture==NULL)
2179         return;
2180
2181     for(i=0; i<MAX_PICTURE_COUNT; i++){
2182        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2183                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2184         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
2185     }
2186     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2187
2188     s->mb_x= s->mb_y= 0;
2189
2190     s->parse_context.state= -1;
2191     s->parse_context.frame_start_found= 0;
2192     s->parse_context.overread= 0;
2193     s->parse_context.overread_index= 0;
2194     s->parse_context.index= 0;
2195     s->parse_context.last_index= 0;
2196     s->bitstream_buffer_size=0;
2197     s->pp_time=0;
2198 }
2199
2200 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2201                                    DCTELEM *block, int n, int qscale)
2202 {
2203     int i, level, nCoeffs;
2204     const uint16_t *quant_matrix;
2205
2206     nCoeffs= s->block_last_index[n];
2207
2208     if (n < 4)
2209         block[0] = block[0] * s->y_dc_scale;
2210     else
2211         block[0] = block[0] * s->c_dc_scale;
2212     /* XXX: only mpeg1 */
2213     quant_matrix = s->intra_matrix;
2214     for(i=1;i<=nCoeffs;i++) {
2215         int j= s->intra_scantable.permutated[i];
2216         level = block[j];
2217         if (level) {
2218             if (level < 0) {
2219                 level = -level;
2220                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2221                 level = (level - 1) | 1;
2222                 level = -level;
2223             } else {
2224                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2225                 level = (level - 1) | 1;
2226             }
2227             block[j] = level;
2228         }
2229     }
2230 }
2231
2232 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2233                                    DCTELEM *block, int n, int qscale)
2234 {
2235     int i, level, nCoeffs;
2236     const uint16_t *quant_matrix;
2237
2238     nCoeffs= s->block_last_index[n];
2239
2240     quant_matrix = s->inter_matrix;
2241     for(i=0; i<=nCoeffs; i++) {
2242         int j= s->intra_scantable.permutated[i];
2243         level = block[j];
2244         if (level) {
2245             if (level < 0) {
2246                 level = -level;
2247                 level = (((level << 1) + 1) * qscale *
2248                          ((int) (quant_matrix[j]))) >> 4;
2249                 level = (level - 1) | 1;
2250                 level = -level;
2251             } else {
2252                 level = (((level << 1) + 1) * qscale *
2253                          ((int) (quant_matrix[j]))) >> 4;
2254                 level = (level - 1) | 1;
2255             }
2256             block[j] = level;
2257         }
2258     }
2259 }
2260
2261 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2262                                    DCTELEM *block, int n, int qscale)
2263 {
2264     int i, level, nCoeffs;
2265     const uint16_t *quant_matrix;
2266
2267     if(s->alternate_scan) nCoeffs= 63;
2268     else nCoeffs= s->block_last_index[n];
2269
2270     if (n < 4)
2271         block[0] = block[0] * s->y_dc_scale;
2272     else
2273         block[0] = block[0] * s->c_dc_scale;
2274     quant_matrix = s->intra_matrix;
2275     for(i=1;i<=nCoeffs;i++) {
2276         int j= s->intra_scantable.permutated[i];
2277         level = block[j];
2278         if (level) {
2279             if (level < 0) {
2280                 level = -level;
2281                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2282                 level = -level;
2283             } else {
2284                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2285             }
2286             block[j] = level;
2287         }
2288     }
2289 }
2290
2291 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2292                                    DCTELEM *block, int n, int qscale)
2293 {
2294     int i, level, nCoeffs;
2295     const uint16_t *quant_matrix;
2296     int sum=-1;
2297
2298     if(s->alternate_scan) nCoeffs= 63;
2299     else nCoeffs= s->block_last_index[n];
2300
2301     if (n < 4)
2302         block[0] = block[0] * s->y_dc_scale;
2303     else
2304         block[0] = block[0] * s->c_dc_scale;
2305     quant_matrix = s->intra_matrix;
2306     for(i=1;i<=nCoeffs;i++) {
2307         int j= s->intra_scantable.permutated[i];
2308         level = block[j];
2309         if (level) {
2310             if (level < 0) {
2311                 level = -level;
2312                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2313                 level = -level;
2314             } else {
2315                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2316             }
2317             block[j] = level;
2318             sum+=level;
2319         }
2320     }
2321     block[63]^=sum&1;
2322 }
2323
2324 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2325                                    DCTELEM *block, int n, int qscale)
2326 {
2327     int i, level, nCoeffs;
2328     const uint16_t *quant_matrix;
2329     int sum=-1;
2330
2331     if(s->alternate_scan) nCoeffs= 63;
2332     else nCoeffs= s->block_last_index[n];
2333
2334     quant_matrix = s->inter_matrix;
2335     for(i=0; i<=nCoeffs; i++) {
2336         int j= s->intra_scantable.permutated[i];
2337         level = block[j];
2338         if (level) {
2339             if (level < 0) {
2340                 level = -level;
2341                 level = (((level << 1) + 1) * qscale *
2342                          ((int) (quant_matrix[j]))) >> 4;
2343                 level = -level;
2344             } else {
2345                 level = (((level << 1) + 1) * qscale *
2346                          ((int) (quant_matrix[j]))) >> 4;
2347             }
2348             block[j] = level;
2349             sum+=level;
2350         }
2351     }
2352     block[63]^=sum&1;
2353 }
2354
2355 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2356                                   DCTELEM *block, int n, int qscale)
2357 {
2358     int i, level, qmul, qadd;
2359     int nCoeffs;
2360
2361     assert(s->block_last_index[n]>=0);
2362
2363     qmul = qscale << 1;
2364
2365     if (!s->h263_aic) {
2366         if (n < 4)
2367             block[0] = block[0] * s->y_dc_scale;
2368         else
2369             block[0] = block[0] * s->c_dc_scale;
2370         qadd = (qscale - 1) | 1;
2371     }else{
2372         qadd = 0;
2373     }
2374     if(s->ac_pred)
2375         nCoeffs=63;
2376     else
2377         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2378
2379     for(i=1; i<=nCoeffs; i++) {
2380         level = block[i];
2381         if (level) {
2382             if (level < 0) {
2383                 level = level * qmul - qadd;
2384             } else {
2385                 level = level * qmul + qadd;
2386             }
2387             block[i] = level;
2388         }
2389     }
2390 }
2391
2392 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2393                                   DCTELEM *block, int n, int qscale)
2394 {
2395     int i, level, qmul, qadd;
2396     int nCoeffs;
2397
2398     assert(s->block_last_index[n]>=0);
2399
2400     qadd = (qscale - 1) | 1;
2401     qmul = qscale << 1;
2402
2403     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2404
2405     for(i=0; i<=nCoeffs; i++) {
2406         level = block[i];
2407         if (level) {
2408             if (level < 0) {
2409                 level = level * qmul - qadd;
2410             } else {
2411                 level = level * qmul + qadd;
2412             }
2413             block[i] = level;
2414         }
2415     }
2416 }
2417
2418 /**
2419  * set qscale and update qscale dependent variables.
2420  */
2421 void ff_set_qscale(MpegEncContext * s, int qscale)
2422 {
2423     if (qscale < 1)
2424         qscale = 1;
2425     else if (qscale > 31)
2426         qscale = 31;
2427
2428     s->qscale = qscale;
2429     s->chroma_qscale= s->chroma_qscale_table[qscale];
2430
2431     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2432     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2433 }