git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file mpegvideo.c
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "avcodec.h"
  31 #include "dsputil.h"
  32 #include "mpegvideo.h"
  33 #include "mpegvideo_common.h"
  34 #include "mjpegenc.h"
  35 #include "msmpeg4.h"
  36 #include "faandct.h"
  37 #include <limits.h>
  38
  39 //#undef NDEBUG
  40 //#include <assert.h>
  41
  42 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  43                                    DCTELEM *block, int n, int qscale);
  44 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  53                                   DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  55                                   DCTELEM *block, int n, int qscale);
  56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  57
  58 #ifdef HAVE_XVMC
  59 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  60 extern void XVMC_field_end(MpegEncContext *s);
  61 extern void XVMC_decode_mb(MpegEncContext *s);
  62 #endif
  63
  64 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  65
  66
  67 /* enable all paranoid tests for rounding, overflows, etc... */
  68 //#define PARANOID
  69
  70 //#define DEBUG
  71
  72
  73 static const uint8_t ff_default_chroma_qscale_table[32]={
  74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  75     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  76 };
  77
  78 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
  79     int i;
  80     int end;
  81
  82     st->scantable= src_scantable;
  83
  84     for(i=0; i<64; i++){
  85         int j;
  86         j = src_scantable[i];
  87         st->permutated[i] = permutation[j];
  88 #ifdef ARCH_POWERPC
  89         st->inverse[j] = i;
  90 #endif
  91     }
  92
  93     end=-1;
  94     for(i=0; i<64; i++){
  95         int j;
  96         j = st->permutated[i];
  97         if(j>end) end=j;
  98         st->raster_end[i]= end;
  99     }
 100 }
 101
 102 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 103     int i;
 104
 105     assert(p<=end);
 106     if(p>=end)
 107         return end;
 108
 109     for(i=0; i<3; i++){
 110         uint32_t tmp= *state << 8;
 111         *state= tmp + *(p++);
 112         if(tmp == 0x100 || p==end)
 113             return p;
 114     }
 115
 116     while(p<end){
 117         if     (p[-1] > 1      ) p+= 3;
 118         else if(p[-2]          ) p+= 2;
 119         else if(p[-3]|(p[-1]-1)) p++;
 120         else{
 121             p++;
 122             break;
 123         }
 124     }
 125
 126     p= FFMIN(p, end)-4;
 127     *state= AV_RB32(p);
 128
 129     return p+4;
 130 }
 131
 132 /* init common dct for both encoder and decoder */
 133 static int DCT_common_init(MpegEncContext *s)
 134 {
 135     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 136     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 137     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 138     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 139     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 140     if(s->flags & CODEC_FLAG_BITEXACT)
 141         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 142     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 143
 144 #if defined(HAVE_MMX)
 145     MPV_common_init_mmx(s);
 146 #elif defined(ARCH_ALPHA)
 147     MPV_common_init_axp(s);
 148 #elif defined(HAVE_MLIB)
 149     MPV_common_init_mlib(s);
 150 #elif defined(HAVE_MMI)
 151     MPV_common_init_mmi(s);
 152 #elif defined(ARCH_ARMV4L)
 153     MPV_common_init_armv4l(s);
 154 #elif defined(ARCH_POWERPC)
 155     MPV_common_init_ppc(s);
 156 #elif defined(ARCH_BFIN)
 157     MPV_common_init_bfin(s);
 158 #endif
 159
 160     /* load & permutate scantables
 161        note: only wmv uses different ones
 162     */
 163     if(s->alternate_scan){
 164         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 165         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 166     }else{
 167         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 168         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 169     }
 170     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 171     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 172
 173     return 0;
 174 }
 175
 176 void copy_picture(Picture *dst, Picture *src){
 177     *dst = *src;
 178     dst->type= FF_BUFFER_TYPE_COPY;
 179 }
 180
 181 /**
 182  * allocates a Picture
 183  * The pixels are allocated/set by calling get_buffer() if shared=0
 184  */
 185 int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 186     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 187     const int mb_array_size= s->mb_stride*s->mb_height;
 188     const int b8_array_size= s->b8_stride*s->mb_height*2;
 189     const int b4_array_size= s->b4_stride*s->mb_height*4;
 190     int i;
 191     int r= -1;
 192
 193     if(shared){
 194         assert(pic->data[0]);
 195         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 196         pic->type= FF_BUFFER_TYPE_SHARED;
 197     }else{
 198         assert(!pic->data[0]);
 199
 200         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 201
 202         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 203             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 204             return -1;
 205         }
 206
 207         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 208             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 209             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 210             return -1;
 211         }
 212
 213         if(pic->linesize[1] != pic->linesize[2]){
 214             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 215             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 216             return -1;
 217         }
 218
 219         s->linesize  = pic->linesize[0];
 220         s->uvlinesize= pic->linesize[1];
 221     }
 222
 223     if(pic->qscale_table==NULL){
 224         if (s->encoding) {
 225             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 226             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 227             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 228         }
 229
 230         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 231         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 232         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
 233         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 234         if(s->out_format == FMT_H264){
 235             for(i=0; i<2; i++){
 236                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
 237                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 238                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 239             }
 240             pic->motion_subsample_log2= 2;
 241         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 242             for(i=0; i<2; i++){
 243                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
 244                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 245                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
 246             }
 247             pic->motion_subsample_log2= 3;
 248         }
 249         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 250             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
 251         }
 252         pic->qstride= s->mb_stride;
 253         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
 254     }
 255
 256     /* It might be nicer if the application would keep track of these
 257      * but it would require an API change. */
 258     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 259     s->prev_pict_types[0]= s->pict_type;
 260     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 261         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 262
 263     return 0;
 264 fail: //for the CHECKED_ALLOCZ macro
 265     if(r>=0)
 266         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 267     return -1;
 268 }
 269
 270 /**
 271  * deallocates a picture
 272  */
 273 static void free_picture(MpegEncContext *s, Picture *pic){
 274     int i;
 275
 276     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 277         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 278     }
 279
 280     av_freep(&pic->mb_var);
 281     av_freep(&pic->mc_mb_var);
 282     av_freep(&pic->mb_mean);
 283     av_freep(&pic->mbskip_table);
 284     av_freep(&pic->qscale_table);
 285     av_freep(&pic->mb_type_base);
 286     av_freep(&pic->dct_coeff);
 287     av_freep(&pic->pan_scan);
 288     pic->mb_type= NULL;
 289     for(i=0; i<2; i++){
 290         av_freep(&pic->motion_val_base[i]);
 291         av_freep(&pic->ref_index[i]);
 292     }
 293
 294     if(pic->type == FF_BUFFER_TYPE_SHARED){
 295         for(i=0; i<4; i++){
 296             pic->base[i]=
 297             pic->data[i]= NULL;
 298         }
 299         pic->type= 0;
 300     }
 301 }
 302
 303 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 304     int i;
 305
 306     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 307     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
 308     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 309
 310      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 311     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
 312     s->rd_scratchpad=   s->me.scratchpad;
 313     s->b_scratchpad=    s->me.scratchpad;
 314     s->obmc_scratchpad= s->me.scratchpad + 16;
 315     if (s->encoding) {
 316         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 317         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 318         if(s->avctx->noise_reduction){
 319             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
 320         }
 321     }
 322     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
 323     s->block= s->blocks[0];
 324
 325     for(i=0;i<12;i++){
 326         s->pblocks[i] = (short *)(&s->block[i]);
 327     }
 328     return 0;
 329 fail:
 330     return -1; //free() through MPV_common_end()
 331 }
 332
 333 static void free_duplicate_context(MpegEncContext *s){
 334     if(s==NULL) return;
 335
 336     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 337     av_freep(&s->me.scratchpad);
 338     s->rd_scratchpad=
 339     s->b_scratchpad=
 340     s->obmc_scratchpad= NULL;
 341
 342     av_freep(&s->dct_error_sum);
 343     av_freep(&s->me.map);
 344     av_freep(&s->me.score_map);
 345     av_freep(&s->blocks);
 346     s->block= NULL;
 347 }
 348
 349 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 350 #define COPY(a) bak->a= src->a
 351     COPY(allocated_edge_emu_buffer);
 352     COPY(edge_emu_buffer);
 353     COPY(me.scratchpad);
 354     COPY(rd_scratchpad);
 355     COPY(b_scratchpad);
 356     COPY(obmc_scratchpad);
 357     COPY(me.map);
 358     COPY(me.score_map);
 359     COPY(blocks);
 360     COPY(block);
 361     COPY(start_mb_y);
 362     COPY(end_mb_y);
 363     COPY(me.map_generation);
 364     COPY(pb);
 365     COPY(dct_error_sum);
 366     COPY(dct_count[0]);
 367     COPY(dct_count[1]);
 368 #undef COPY
 369 }
 370
 371 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 372     MpegEncContext bak;
 373     int i;
 374     //FIXME copy only needed parts
 375 //START_TIMER
 376     backup_duplicate_context(&bak, dst);
 377     memcpy(dst, src, sizeof(MpegEncContext));
 378     backup_duplicate_context(dst, &bak);
 379     for(i=0;i<12;i++){
 380         dst->pblocks[i] = (short *)(&dst->block[i]);
 381     }
 382 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 383 }
 384
 385 /**
 386  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 387  * the changed fields will not depend upon the prior state of the MpegEncContext.
 388  */
 389 void MPV_common_defaults(MpegEncContext *s){
 390     s->y_dc_scale_table=
 391     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 392     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 393     s->progressive_frame= 1;
 394     s->progressive_sequence= 1;
 395     s->picture_structure= PICT_FRAME;
 396
 397     s->coded_picture_number = 0;
 398     s->picture_number = 0;
 399     s->input_picture_number = 0;
 400
 401     s->picture_in_gop_number = 0;
 402
 403     s->f_code = 1;
 404     s->b_code = 1;
 405 }
 406
 407 /**
 408  * sets the given MpegEncContext to defaults for decoding.
 409  * the changed fields will not depend upon the prior state of the MpegEncContext.
 410  */
 411 void MPV_decode_defaults(MpegEncContext *s){
 412     MPV_common_defaults(s);
 413 }
 414
 415 /**
 416  * init common structure for both encoder and decoder.
 417  * this assumes that some variables like width/height are already set
 418  */
 419 int MPV_common_init(MpegEncContext *s)
 420 {
 421     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 422
 423     s->mb_height = (s->height + 15) / 16;
 424
 425     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 426         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 427         return -1;
 428     }
 429
 430     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 431         return -1;
 432
 433     dsputil_init(&s->dsp, s->avctx);
 434     DCT_common_init(s);
 435
 436     s->flags= s->avctx->flags;
 437     s->flags2= s->avctx->flags2;
 438
 439     s->mb_width  = (s->width  + 15) / 16;
 440     s->mb_stride = s->mb_width + 1;
 441     s->b8_stride = s->mb_width*2 + 1;
 442     s->b4_stride = s->mb_width*4 + 1;
 443     mb_array_size= s->mb_height * s->mb_stride;
 444     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 445
 446     /* set chroma shifts */
 447     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 448                                                     &(s->chroma_y_shift) );
 449
 450     /* set default edge pos, will be overriden in decode_header if needed */
 451     s->h_edge_pos= s->mb_width*16;
 452     s->v_edge_pos= s->mb_height*16;
 453
 454     s->mb_num = s->mb_width * s->mb_height;
 455
 456     s->block_wrap[0]=
 457     s->block_wrap[1]=
 458     s->block_wrap[2]=
 459     s->block_wrap[3]= s->b8_stride;
 460     s->block_wrap[4]=
 461     s->block_wrap[5]= s->mb_stride;
 462
 463     y_size = s->b8_stride * (2 * s->mb_height + 1);
 464     c_size = s->mb_stride * (s->mb_height + 1);
 465     yc_size = y_size + 2 * c_size;
 466
 467     /* convert fourcc to upper case */
 468     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
 469                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 470                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 471                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 472
 473     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
 474                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 475                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 476                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 477
 478     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 479
 480     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 481     for(y=0; y<s->mb_height; y++){
 482         for(x=0; x<s->mb_width; x++){
 483             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 484         }
 485     }
 486     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 487
 488     if (s->encoding) {
 489         /* Allocate MV tables */
 490         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 491         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 492         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 493         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 494         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 495         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 496         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 497         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 498         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 499         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 500         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 501         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 502
 503         if(s->msmpeg4_version){
 504             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 505         }
 506         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 507
 508         /* Allocate MB type table */
 509         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
 510
 511         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
 512
 513         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
 514         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
 515         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
 516         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
 517         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 518         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
 519
 520         if(s->avctx->noise_reduction){
 521             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
 522         }
 523     }
 524     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 525
 526     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 527
 528     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 529         /* interlaced direct mode decoding tables */
 530             for(i=0; i<2; i++){
 531                 int j, k;
 532                 for(j=0; j<2; j++){
 533                     for(k=0; k<2; k++){
 534                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
 535                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
 536                     }
 537                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
 538                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
 539                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
 540                 }
 541                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
 542             }
 543     }
 544     if (s->out_format == FMT_H263) {
 545         /* ac values */
 546         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
 547         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 548         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 549         s->ac_val[2] = s->ac_val[1] + c_size;
 550
 551         /* cbp values */
 552         CHECKED_ALLOCZ(s->coded_block_base, y_size);
 553         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 554
 555         /* cbp, ac_pred, pred_dir */
 556         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 557         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 558     }
 559
 560     if (s->h263_pred || s->h263_plus || !s->encoding) {
 561         /* dc values */
 562         //MN: we need these for error resilience of intra-frames
 563         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
 564         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 565         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 566         s->dc_val[2] = s->dc_val[1] + c_size;
 567         for(i=0;i<yc_size;i++)
 568             s->dc_val_base[i] = 1024;
 569     }
 570
 571     /* which mb is a intra block */
 572     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 573     memset(s->mbintra_table, 1, mb_array_size);
 574
 575     /* init macroblock skip table */
 576     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 577     //Note the +1 is for a quicker mpeg4 slice_end detection
 578     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 579
 580     s->parse_context.state= -1;
 581     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 582        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 583        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 584        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
 585     }
 586
 587     s->context_initialized = 1;
 588
 589     s->thread_context[0]= s;
 590     for(i=1; i<s->avctx->thread_count; i++){
 591         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 592         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 593     }
 594
 595     for(i=0; i<s->avctx->thread_count; i++){
 596         if(init_duplicate_context(s->thread_context[i], s) < 0)
 597            goto fail;
 598         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 599         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 600     }
 601
 602     return 0;
 603  fail:
 604     MPV_common_end(s);
 605     return -1;
 606 }
 607
 608 /* init common structure for both encoder and decoder */
 609 void MPV_common_end(MpegEncContext *s)
 610 {
 611     int i, j, k;
 612
 613     for(i=0; i<s->avctx->thread_count; i++){
 614         free_duplicate_context(s->thread_context[i]);
 615     }
 616     for(i=1; i<s->avctx->thread_count; i++){
 617         av_freep(&s->thread_context[i]);
 618     }
 619
 620     av_freep(&s->parse_context.buffer);
 621     s->parse_context.buffer_size=0;
 622
 623     av_freep(&s->mb_type);
 624     av_freep(&s->p_mv_table_base);
 625     av_freep(&s->b_forw_mv_table_base);
 626     av_freep(&s->b_back_mv_table_base);
 627     av_freep(&s->b_bidir_forw_mv_table_base);
 628     av_freep(&s->b_bidir_back_mv_table_base);
 629     av_freep(&s->b_direct_mv_table_base);
 630     s->p_mv_table= NULL;
 631     s->b_forw_mv_table= NULL;
 632     s->b_back_mv_table= NULL;
 633     s->b_bidir_forw_mv_table= NULL;
 634     s->b_bidir_back_mv_table= NULL;
 635     s->b_direct_mv_table= NULL;
 636     for(i=0; i<2; i++){
 637         for(j=0; j<2; j++){
 638             for(k=0; k<2; k++){
 639                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 640                 s->b_field_mv_table[i][j][k]=NULL;
 641             }
 642             av_freep(&s->b_field_select_table[i][j]);
 643             av_freep(&s->p_field_mv_table_base[i][j]);
 644             s->p_field_mv_table[i][j]=NULL;
 645         }
 646         av_freep(&s->p_field_select_table[i]);
 647     }
 648
 649     av_freep(&s->dc_val_base);
 650     av_freep(&s->ac_val_base);
 651     av_freep(&s->coded_block_base);
 652     av_freep(&s->mbintra_table);
 653     av_freep(&s->cbp_table);
 654     av_freep(&s->pred_dir_table);
 655
 656     av_freep(&s->mbskip_table);
 657     av_freep(&s->prev_pict_types);
 658     av_freep(&s->bitstream_buffer);
 659     s->allocated_bitstream_buffer_size=0;
 660
 661     av_freep(&s->avctx->stats_out);
 662     av_freep(&s->ac_stats);
 663     av_freep(&s->error_status_table);
 664     av_freep(&s->mb_index2xy);
 665     av_freep(&s->lambda_table);
 666     av_freep(&s->q_intra_matrix);
 667     av_freep(&s->q_inter_matrix);
 668     av_freep(&s->q_intra_matrix16);
 669     av_freep(&s->q_inter_matrix16);
 670     av_freep(&s->input_picture);
 671     av_freep(&s->reordered_input_picture);
 672     av_freep(&s->dct_offset);
 673
 674     if(s->picture){
 675         for(i=0; i<MAX_PICTURE_COUNT; i++){
 676             free_picture(s, &s->picture[i]);
 677         }
 678     }
 679     av_freep(&s->picture);
 680     s->context_initialized = 0;
 681     s->last_picture_ptr=
 682     s->next_picture_ptr=
 683     s->current_picture_ptr= NULL;
 684     s->linesize= s->uvlinesize= 0;
 685
 686     for(i=0; i<3; i++)
 687         av_freep(&s->visualization_buffer[i]);
 688
 689     avcodec_default_free_buffers(s->avctx);
 690 }
 691
 692 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
 693 {
 694     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 695     uint8_t index_run[MAX_RUN+1];
 696     int last, run, level, start, end, i;
 697
 698     /* If table is static, we can quit if rl->max_level[0] is not NULL */
 699     if(static_store && rl->max_level[0])
 700         return;
 701
 702     /* compute max_level[], max_run[] and index_run[] */
 703     for(last=0;last<2;last++) {
 704         if (last == 0) {
 705             start = 0;
 706             end = rl->last;
 707         } else {
 708             start = rl->last;
 709             end = rl->n;
 710         }
 711
 712         memset(max_level, 0, MAX_RUN + 1);
 713         memset(max_run, 0, MAX_LEVEL + 1);
 714         memset(index_run, rl->n, MAX_RUN + 1);
 715         for(i=start;i<end;i++) {
 716             run = rl->table_run[i];
 717             level = rl->table_level[i];
 718             if (index_run[run] == rl->n)
 719                 index_run[run] = i;
 720             if (level > max_level[run])
 721                 max_level[run] = level;
 722             if (run > max_run[level])
 723                 max_run[level] = run;
 724         }
 725         if(static_store)
 726             rl->max_level[last] = static_store[last];
 727         else
 728             rl->max_level[last] = av_malloc(MAX_RUN + 1);
 729         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 730         if(static_store)
 731             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
 732         else
 733             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 734         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 735         if(static_store)
 736             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
 737         else
 738             rl->index_run[last] = av_malloc(MAX_RUN + 1);
 739         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 740     }
 741 }
 742
 743 void init_vlc_rl(RLTable *rl, int use_static)
 744 {
 745     int i, q;
 746
 747     /* Return if static table is already initialized */
 748     if(use_static && rl->rl_vlc[0])
 749         return;
 750
 751     init_vlc(&rl->vlc, 9, rl->n + 1,
 752              &rl->table_vlc[0][1], 4, 2,
 753              &rl->table_vlc[0][0], 4, 2, use_static);
 754
 755
 756     for(q=0; q<32; q++){
 757         int qmul= q*2;
 758         int qadd= (q-1)|1;
 759
 760         if(q==0){
 761             qmul=1;
 762             qadd=0;
 763         }
 764         if(use_static)
 765             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
 766         else
 767             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
 768         for(i=0; i<rl->vlc.table_size; i++){
 769             int code= rl->vlc.table[i][0];
 770             int len = rl->vlc.table[i][1];
 771             int level, run;
 772
 773             if(len==0){ // illegal code
 774                 run= 66;
 775                 level= MAX_LEVEL;
 776             }else if(len<0){ //more bits needed
 777                 run= 0;
 778                 level= code;
 779             }else{
 780                 if(code==rl->n){ //esc
 781                     run= 66;
 782                     level= 0;
 783                 }else{
 784                     run=   rl->table_run  [code] + 1;
 785                     level= rl->table_level[code] * qmul + qadd;
 786                     if(code >= rl->last) run+=192;
 787                 }
 788             }
 789             rl->rl_vlc[q][i].len= len;
 790             rl->rl_vlc[q][i].level= level;
 791             rl->rl_vlc[q][i].run= run;
 792         }
 793     }
 794 }
 795
 796 /* draw the edges of width 'w' of an image of size width, height */
 797 //FIXME check that this is ok for mpeg4 interlaced
 798 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 799 {
 800     uint8_t *ptr, *last_line;
 801     int i;
 802
 803     last_line = buf + (height - 1) * wrap;
 804     for(i=0;i<w;i++) {
 805         /* top and bottom */
 806         memcpy(buf - (i + 1) * wrap, buf, width);
 807         memcpy(last_line + (i + 1) * wrap, last_line, width);
 808     }
 809     /* left and right */
 810     ptr = buf;
 811     for(i=0;i<height;i++) {
 812         memset(ptr - w, ptr[0], w);
 813         memset(ptr + width, ptr[width-1], w);
 814         ptr += wrap;
 815     }
 816     /* corners */
 817     for(i=0;i<w;i++) {
 818         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 819         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 820         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 821         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 822     }
 823 }
 824
 825 int ff_find_unused_picture(MpegEncContext *s, int shared){
 826     int i;
 827
 828     if(shared){
 829         for(i=0; i<MAX_PICTURE_COUNT; i++){
 830             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
 831         }
 832     }else{
 833         for(i=0; i<MAX_PICTURE_COUNT; i++){
 834             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
 835         }
 836         for(i=0; i<MAX_PICTURE_COUNT; i++){
 837             if(s->picture[i].data[0]==NULL) return i;
 838         }
 839     }
 840
 841     assert(0);
 842     return -1;
 843 }
 844
 845 static void update_noise_reduction(MpegEncContext *s){
 846     int intra, i;
 847
 848     for(intra=0; intra<2; intra++){
 849         if(s->dct_count[intra] > (1<<16)){
 850             for(i=0; i<64; i++){
 851                 s->dct_error_sum[intra][i] >>=1;
 852             }
 853             s->dct_count[intra] >>= 1;
 854         }
 855
 856         for(i=0; i<64; i++){
 857             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
 858         }
 859     }
 860 }
 861
 862 /**
 863  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
 864  */
 865 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 866 {
 867     int i;
 868     AVFrame *pic;
 869     s->mb_skipped = 0;
 870
 871     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 872
 873     /* mark&release old frames */
 874     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
 875       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
 876         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 877
 878         /* release forgotten pictures */
 879         /* if(mpeg124/h263) */
 880         if(!s->encoding){
 881             for(i=0; i<MAX_PICTURE_COUNT; i++){
 882                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 883                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
 884                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 885                 }
 886             }
 887         }
 888       }
 889     }
 890 alloc:
 891     if(!s->encoding){
 892         /* release non reference frames */
 893         for(i=0; i<MAX_PICTURE_COUNT; i++){
 894             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 895                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
 896             }
 897         }
 898
 899         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
 900             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
 901         else{
 902             i= ff_find_unused_picture(s, 0);
 903             pic= (AVFrame*)&s->picture[i];
 904         }
 905
 906         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
 907                         && !s->dropable ? 3 : 0;
 908
 909         pic->coded_picture_number= s->coded_picture_number++;
 910
 911         if( alloc_picture(s, (Picture*)pic, 0) < 0)
 912             return -1;
 913
 914         s->current_picture_ptr= (Picture*)pic;
 915         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
 916         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
 917     }
 918
 919     s->current_picture_ptr->pict_type= s->pict_type;
 920 //    if(s->flags && CODEC_FLAG_QSCALE)
 921   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
 922     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
 923
 924     copy_picture(&s->current_picture, s->current_picture_ptr);
 925
 926     if (s->pict_type != B_TYPE) {
 927         s->last_picture_ptr= s->next_picture_ptr;
 928         if(!s->dropable)
 929             s->next_picture_ptr= s->current_picture_ptr;
 930     }
 931 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
 932         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
 933         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
 934         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
 935         s->pict_type, s->dropable);*/
 936
 937     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
 938     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
 939
 940     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
 941         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
 942         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
 943         goto alloc;
 944     }
 945
 946     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
 947
 948     if(s->picture_structure!=PICT_FRAME){
 949         int i;
 950         for(i=0; i<4; i++){
 951             if(s->picture_structure == PICT_BOTTOM_FIELD){
 952                  s->current_picture.data[i] += s->current_picture.linesize[i];
 953             }
 954             s->current_picture.linesize[i] *= 2;
 955             s->last_picture.linesize[i] *=2;
 956             s->next_picture.linesize[i] *=2;
 957         }
 958     }
 959
 960     s->hurry_up= s->avctx->hurry_up;
 961     s->error_resilience= avctx->error_resilience;
 962
 963     /* set dequantizer, we can't do it during init as it might change for mpeg4
 964        and we can't do it in the header decode as init is not called for mpeg4 there yet */
 965     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
 966         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
 967         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
 968     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
 969         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
 970         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
 971     }else{
 972         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
 973         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
 974     }
 975
 976     if(s->dct_error_sum){
 977         assert(s->avctx->noise_reduction && s->encoding);
 978
 979         update_noise_reduction(s);
 980     }
 981
 982 #ifdef HAVE_XVMC
 983     if(s->avctx->xvmc_acceleration)
 984         return XVMC_field_start(s, avctx);
 985 #endif
 986     return 0;
 987 }
 988
 989 /* generic function for encode/decode called after a frame has been coded/decoded */
 990 void MPV_frame_end(MpegEncContext *s)
 991 {
 992     int i;
 993     /* draw edge for correct motion prediction if outside */
 994 #ifdef HAVE_XVMC
 995 //just to make sure that all data is rendered.
 996     if(s->avctx->xvmc_acceleration){
 997         XVMC_field_end(s);
 998     }else
 999 #endif
1000     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1001             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1002             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1003             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1004     }
1005     emms_c();
1006
1007     s->last_pict_type    = s->pict_type;
1008     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1009     if(s->pict_type!=B_TYPE){
1010         s->last_non_b_pict_type= s->pict_type;
1011     }
1012 #if 0
1013         /* copy back current_picture variables */
1014     for(i=0; i<MAX_PICTURE_COUNT; i++){
1015         if(s->picture[i].data[0] == s->current_picture.data[0]){
1016             s->picture[i]= s->current_picture;
1017             break;
1018         }
1019     }
1020     assert(i<MAX_PICTURE_COUNT);
1021 #endif
1022
1023     if(s->encoding){
1024         /* release non-reference frames */
1025         for(i=0; i<MAX_PICTURE_COUNT; i++){
1026             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1027                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1028             }
1029         }
1030     }
1031     // clear copies, to avoid confusion
1032 #if 0
1033     memset(&s->last_picture, 0, sizeof(Picture));
1034     memset(&s->next_picture, 0, sizeof(Picture));
1035     memset(&s->current_picture, 0, sizeof(Picture));
1036 #endif
1037     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1038 }
1039
1040 /**
1041  * draws an line from (ex, ey) -> (sx, sy).
1042  * @param w width of the image
1043  * @param h height of the image
1044  * @param stride stride/linesize of the image
1045  * @param color color of the arrow
1046  */
1047 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1048     int x, y, fr, f;
1049
1050     sx= av_clip(sx, 0, w-1);
1051     sy= av_clip(sy, 0, h-1);
1052     ex= av_clip(ex, 0, w-1);
1053     ey= av_clip(ey, 0, h-1);
1054
1055     buf[sy*stride + sx]+= color;
1056
1057     if(FFABS(ex - sx) > FFABS(ey - sy)){
1058         if(sx > ex){
1059             FFSWAP(int, sx, ex);
1060             FFSWAP(int, sy, ey);
1061         }
1062         buf+= sx + sy*stride;
1063         ex-= sx;
1064         f= ((ey-sy)<<16)/ex;
1065         for(x= 0; x <= ex; x++){
1066             y = (x*f)>>16;
1067             fr= (x*f)&0xFFFF;
1068             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1069             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1070         }
1071     }else{
1072         if(sy > ey){
1073             FFSWAP(int, sx, ex);
1074             FFSWAP(int, sy, ey);
1075         }
1076         buf+= sx + sy*stride;
1077         ey-= sy;
1078         if(ey) f= ((ex-sx)<<16)/ey;
1079         else   f= 0;
1080         for(y= 0; y <= ey; y++){
1081             x = (y*f)>>16;
1082             fr= (y*f)&0xFFFF;
1083             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1084             buf[y*stride + x+1]+= (color*         fr )>>16;;
1085         }
1086     }
1087 }
1088
1089 /**
1090  * draws an arrow from (ex, ey) -> (sx, sy).
1091  * @param w width of the image
1092  * @param h height of the image
1093  * @param stride stride/linesize of the image
1094  * @param color color of the arrow
1095  */
1096 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1097     int dx,dy;
1098
1099     sx= av_clip(sx, -100, w+100);
1100     sy= av_clip(sy, -100, h+100);
1101     ex= av_clip(ex, -100, w+100);
1102     ey= av_clip(ey, -100, h+100);
1103
1104     dx= ex - sx;
1105     dy= ey - sy;
1106
1107     if(dx*dx + dy*dy > 3*3){
1108         int rx=  dx + dy;
1109         int ry= -dx + dy;
1110         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1111
1112         //FIXME subpixel accuracy
1113         rx= ROUNDED_DIV(rx*3<<4, length);
1114         ry= ROUNDED_DIV(ry*3<<4, length);
1115
1116         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1117         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1118     }
1119     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1120 }
1121
1122 /**
1123  * prints debuging info for the given picture.
1124  */
1125 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1126
1127     if(!pict || !pict->mb_type) return;
1128
1129     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1130         int x,y;
1131
1132         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1133         switch (pict->pict_type) {
1134             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1135             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1136             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1137             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1138             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1139             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1140         }
1141         for(y=0; y<s->mb_height; y++){
1142             for(x=0; x<s->mb_width; x++){
1143                 if(s->avctx->debug&FF_DEBUG_SKIP){
1144                     int count= s->mbskip_table[x + y*s->mb_stride];
1145                     if(count>9) count=9;
1146                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1147                 }
1148                 if(s->avctx->debug&FF_DEBUG_QP){
1149                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1150                 }
1151                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1152                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1153                     //Type & MV direction
1154                     if(IS_PCM(mb_type))
1155                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1156                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1157                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1158                     else if(IS_INTRA4x4(mb_type))
1159                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1160                     else if(IS_INTRA16x16(mb_type))
1161                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1162                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1163                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1164                     else if(IS_DIRECT(mb_type))
1165                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1166                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1167                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1168                     else if(IS_GMC(mb_type))
1169                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1170                     else if(IS_SKIP(mb_type))
1171                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1172                     else if(!USES_LIST(mb_type, 1))
1173                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1174                     else if(!USES_LIST(mb_type, 0))
1175                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1176                     else{
1177                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1178                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1179                     }
1180
1181                     //segmentation
1182                     if(IS_8X8(mb_type))
1183                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1184                     else if(IS_16X8(mb_type))
1185                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1186                     else if(IS_8X16(mb_type))
1187                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1188                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1189                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1190                     else
1191                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1192
1193
1194                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1195                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1196                     else
1197                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1198                 }
1199 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1200             }
1201             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1202         }
1203     }
1204
1205     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1206         const int shift= 1 + s->quarter_sample;
1207         int mb_y;
1208         uint8_t *ptr;
1209         int i;
1210         int h_chroma_shift, v_chroma_shift;
1211         const int width = s->avctx->width;
1212         const int height= s->avctx->height;
1213         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1214         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1215         s->low_delay=0; //needed to see the vectors without trashing the buffers
1216
1217         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1218         for(i=0; i<3; i++){
1219             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1220             pict->data[i]= s->visualization_buffer[i];
1221         }
1222         pict->type= FF_BUFFER_TYPE_COPY;
1223         ptr= pict->data[0];
1224
1225         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1226             int mb_x;
1227             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1228                 const int mb_index= mb_x + mb_y*s->mb_stride;
1229                 if((s->avctx->debug_mv) && pict->motion_val){
1230                   int type;
1231                   for(type=0; type<3; type++){
1232                     int direction = 0;
1233                     switch (type) {
1234                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1235                                 continue;
1236                               direction = 0;
1237                               break;
1238                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1239                                 continue;
1240                               direction = 0;
1241                               break;
1242                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1243                                 continue;
1244                               direction = 1;
1245                               break;
1246                     }
1247                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1248                         continue;
1249
1250                     if(IS_8X8(pict->mb_type[mb_index])){
1251                       int i;
1252                       for(i=0; i<4; i++){
1253                         int sx= mb_x*16 + 4 + 8*(i&1);
1254                         int sy= mb_y*16 + 4 + 8*(i>>1);
1255                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1256                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1257                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1258                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1259                       }
1260                     }else if(IS_16X8(pict->mb_type[mb_index])){
1261                       int i;
1262                       for(i=0; i<2; i++){
1263                         int sx=mb_x*16 + 8;
1264                         int sy=mb_y*16 + 4 + 8*i;
1265                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1266                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1267                         int my=(pict->motion_val[direction][xy][1]>>shift);
1268
1269                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1270                             my*=2;
1271
1272                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1273                       }
1274                     }else if(IS_8X16(pict->mb_type[mb_index])){
1275                       int i;
1276                       for(i=0; i<2; i++){
1277                         int sx=mb_x*16 + 4 + 8*i;
1278                         int sy=mb_y*16 + 8;
1279                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1280                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1281                         int my=(pict->motion_val[direction][xy][1]>>shift);
1282
1283                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1284                             my*=2;
1285
1286                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1287                       }
1288                     }else{
1289                       int sx= mb_x*16 + 8;
1290                       int sy= mb_y*16 + 8;
1291                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1292                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1293                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1294                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1295                     }
1296                   }
1297                 }
1298                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1299                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1300                     int y;
1301                     for(y=0; y<8; y++){
1302                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1303                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1304                     }
1305                 }
1306                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1307                     int mb_type= pict->mb_type[mb_index];
1308                     uint64_t u,v;
1309                     int y;
1310 #define COLOR(theta, r)\
1311 u= (int)(128 + r*cos(theta*3.141592/180));\
1312 v= (int)(128 + r*sin(theta*3.141592/180));
1313
1314
1315                     u=v=128;
1316                     if(IS_PCM(mb_type)){
1317                         COLOR(120,48)
1318                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1319                         COLOR(30,48)
1320                     }else if(IS_INTRA4x4(mb_type)){
1321                         COLOR(90,48)
1322                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1323 //                        COLOR(120,48)
1324                     }else if(IS_DIRECT(mb_type)){
1325                         COLOR(150,48)
1326                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1327                         COLOR(170,48)
1328                     }else if(IS_GMC(mb_type)){
1329                         COLOR(190,48)
1330                     }else if(IS_SKIP(mb_type)){
1331 //                        COLOR(180,48)
1332                     }else if(!USES_LIST(mb_type, 1)){
1333                         COLOR(240,48)
1334                     }else if(!USES_LIST(mb_type, 0)){
1335                         COLOR(0,48)
1336                     }else{
1337                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1338                         COLOR(300,48)
1339                     }
1340
1341                     u*= 0x0101010101010101ULL;
1342                     v*= 0x0101010101010101ULL;
1343                     for(y=0; y<8; y++){
1344                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1345                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1346                     }
1347
1348                     //segmentation
1349                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1350                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1351                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1352                     }
1353                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1354                         for(y=0; y<16; y++)
1355                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1356                     }
1357                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1358                         int dm= 1 << (mv_sample_log2-2);
1359                         for(i=0; i<4; i++){
1360                             int sx= mb_x*16 + 8*(i&1);
1361                             int sy= mb_y*16 + 8*(i>>1);
1362                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1363                             //FIXME bidir
1364                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1365                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1366                                 for(y=0; y<8; y++)
1367                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1368                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1369                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1370                         }
1371                     }
1372
1373                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1374                         // hmm
1375                     }
1376                 }
1377                 s->mbskip_table[mb_index]=0;
1378             }
1379         }
1380     }
1381 }
1382
1383 /**
1384  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1385  * @param buf destination buffer
1386  * @param src source buffer
1387  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1388  * @param block_w width of block
1389  * @param block_h height of block
1390  * @param src_x x coordinate of the top left sample of the block in the source buffer
1391  * @param src_y y coordinate of the top left sample of the block in the source buffer
1392  * @param w width of the source buffer
1393  * @param h height of the source buffer
1394  */
1395 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1396                                     int src_x, int src_y, int w, int h){
1397     int x, y;
1398     int start_y, start_x, end_y, end_x;
1399
1400     if(src_y>= h){
1401         src+= (h-1-src_y)*linesize;
1402         src_y=h-1;
1403     }else if(src_y<=-block_h){
1404         src+= (1-block_h-src_y)*linesize;
1405         src_y=1-block_h;
1406     }
1407     if(src_x>= w){
1408         src+= (w-1-src_x);
1409         src_x=w-1;
1410     }else if(src_x<=-block_w){
1411         src+= (1-block_w-src_x);
1412         src_x=1-block_w;
1413     }
1414
1415     start_y= FFMAX(0, -src_y);
1416     start_x= FFMAX(0, -src_x);
1417     end_y= FFMIN(block_h, h-src_y);
1418     end_x= FFMIN(block_w, w-src_x);
1419
1420     // copy existing part
1421     for(y=start_y; y<end_y; y++){
1422         for(x=start_x; x<end_x; x++){
1423             buf[x + y*linesize]= src[x + y*linesize];
1424         }
1425     }
1426
1427     //top
1428     for(y=0; y<start_y; y++){
1429         for(x=start_x; x<end_x; x++){
1430             buf[x + y*linesize]= buf[x + start_y*linesize];
1431         }
1432     }
1433
1434     //bottom
1435     for(y=end_y; y<block_h; y++){
1436         for(x=start_x; x<end_x; x++){
1437             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1438         }
1439     }
1440
1441     for(y=0; y<block_h; y++){
1442        //left
1443         for(x=0; x<start_x; x++){
1444             buf[x + y*linesize]= buf[start_x + y*linesize];
1445         }
1446
1447        //right
1448         for(x=end_x; x<block_w; x++){
1449             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1450         }
1451     }
1452 }
1453
1454 static inline int hpel_motion_lowres(MpegEncContext *s,
1455                                   uint8_t *dest, uint8_t *src,
1456                                   int field_based, int field_select,
1457                                   int src_x, int src_y,
1458                                   int width, int height, int stride,
1459                                   int h_edge_pos, int v_edge_pos,
1460                                   int w, int h, h264_chroma_mc_func *pix_op,
1461                                   int motion_x, int motion_y)
1462 {
1463     const int lowres= s->avctx->lowres;
1464     const int s_mask= (2<<lowres)-1;
1465     int emu=0;
1466     int sx, sy;
1467
1468     if(s->quarter_sample){
1469         motion_x/=2;
1470         motion_y/=2;
1471     }
1472
1473     sx= motion_x & s_mask;
1474     sy= motion_y & s_mask;
1475     src_x += motion_x >> (lowres+1);
1476     src_y += motion_y >> (lowres+1);
1477
1478     src += src_y * stride + src_x;
1479
1480     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1481        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1482         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1483                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1484         src= s->edge_emu_buffer;
1485         emu=1;
1486     }
1487
1488     sx <<= 2 - lowres;
1489     sy <<= 2 - lowres;
1490     if(field_select)
1491         src += s->linesize;
1492     pix_op[lowres](dest, src, stride, h, sx, sy);
1493     return emu;
1494 }
1495
1496 /* apply one mpeg motion vector to the three components */
1497 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1498                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1499                                int field_based, int bottom_field, int field_select,
1500                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1501                                int motion_x, int motion_y, int h)
1502 {
1503     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1504     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1505     const int lowres= s->avctx->lowres;
1506     const int block_s= 8>>lowres;
1507     const int s_mask= (2<<lowres)-1;
1508     const int h_edge_pos = s->h_edge_pos >> lowres;
1509     const int v_edge_pos = s->v_edge_pos >> lowres;
1510     linesize   = s->current_picture.linesize[0] << field_based;
1511     uvlinesize = s->current_picture.linesize[1] << field_based;
1512
1513     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
1514         motion_x/=2;
1515         motion_y/=2;
1516     }
1517
1518     if(field_based){
1519         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1520     }
1521
1522     sx= motion_x & s_mask;
1523     sy= motion_y & s_mask;
1524     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1525     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1526
1527     if (s->out_format == FMT_H263) {
1528         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1529         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1530         uvsrc_x = src_x>>1;
1531         uvsrc_y = src_y>>1;
1532     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1533         mx = motion_x / 4;
1534         my = motion_y / 4;
1535         uvsx = (2*mx) & s_mask;
1536         uvsy = (2*my) & s_mask;
1537         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1538         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
1539     } else {
1540         mx = motion_x / 2;
1541         my = motion_y / 2;
1542         uvsx = mx & s_mask;
1543         uvsy = my & s_mask;
1544         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1545         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
1546     }
1547
1548     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1549     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1550     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1551
1552     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1553        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1554             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1555                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1556             ptr_y = s->edge_emu_buffer;
1557             if(!(s->flags&CODEC_FLAG_GRAY)){
1558                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1559                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1560                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1561                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1562                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1563                 ptr_cb= uvbuf;
1564                 ptr_cr= uvbuf+16;
1565             }
1566     }
1567
1568     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1569         dest_y += s->linesize;
1570         dest_cb+= s->uvlinesize;
1571         dest_cr+= s->uvlinesize;
1572     }
1573
1574     if(field_select){
1575         ptr_y += s->linesize;
1576         ptr_cb+= s->uvlinesize;
1577         ptr_cr+= s->uvlinesize;
1578     }
1579
1580     sx <<= 2 - lowres;
1581     sy <<= 2 - lowres;
1582     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1583
1584     if(!(s->flags&CODEC_FLAG_GRAY)){
1585         uvsx <<= 2 - lowres;
1586         uvsy <<= 2 - lowres;
1587         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1588         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1589     }
1590     //FIXME h261 lowres loop filter
1591 }
1592
1593 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1594                                      uint8_t *dest_cb, uint8_t *dest_cr,
1595                                      uint8_t **ref_picture,
1596                                      h264_chroma_mc_func *pix_op,
1597                                      int mx, int my){
1598     const int lowres= s->avctx->lowres;
1599     const int block_s= 8>>lowres;
1600     const int s_mask= (2<<lowres)-1;
1601     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1602     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1603     int emu=0, src_x, src_y, offset, sx, sy;
1604     uint8_t *ptr;
1605
1606     if(s->quarter_sample){
1607         mx/=2;
1608         my/=2;
1609     }
1610
1611     /* In case of 8X8, we construct a single chroma motion vector
1612        with a special rounding */
1613     mx= ff_h263_round_chroma(mx);
1614     my= ff_h263_round_chroma(my);
1615
1616     sx= mx & s_mask;
1617     sy= my & s_mask;
1618     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1619     src_y = s->mb_y*block_s + (my >> (lowres+1));
1620
1621     offset = src_y * s->uvlinesize + src_x;
1622     ptr = ref_picture[1] + offset;
1623     if(s->flags&CODEC_FLAG_EMU_EDGE){
1624         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1625            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1626             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1627             ptr= s->edge_emu_buffer;
1628             emu=1;
1629         }
1630     }
1631     sx <<= 2 - lowres;
1632     sy <<= 2 - lowres;
1633     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1634
1635     ptr = ref_picture[2] + offset;
1636     if(emu){
1637         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1638         ptr= s->edge_emu_buffer;
1639     }
1640     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1641 }
1642
1643 /**
1644  * motion compensation of a single macroblock
1645  * @param s context
1646  * @param dest_y luma destination pointer
1647  * @param dest_cb chroma cb/u destination pointer
1648  * @param dest_cr chroma cr/v destination pointer
1649  * @param dir direction (0->forward, 1->backward)
1650  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1651  * @param pic_op halfpel motion compensation function (average or put normally)
1652  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1653  */
1654 static inline void MPV_motion_lowres(MpegEncContext *s,
1655                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1656                               int dir, uint8_t **ref_picture,
1657                               h264_chroma_mc_func *pix_op)
1658 {
1659     int mx, my;
1660     int mb_x, mb_y, i;
1661     const int lowres= s->avctx->lowres;
1662     const int block_s= 8>>lowres;
1663
1664     mb_x = s->mb_x;
1665     mb_y = s->mb_y;
1666
1667     switch(s->mv_type) {
1668     case MV_TYPE_16X16:
1669         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1670                     0, 0, 0,
1671                     ref_picture, pix_op,
1672                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1673         break;
1674     case MV_TYPE_8X8:
1675         mx = 0;
1676         my = 0;
1677             for(i=0;i<4;i++) {
1678                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1679                             ref_picture[0], 0, 0,
1680                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1681                             s->width, s->height, s->linesize,
1682                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1683                             block_s, block_s, pix_op,
1684                             s->mv[dir][i][0], s->mv[dir][i][1]);
1685
1686                 mx += s->mv[dir][i][0];
1687                 my += s->mv[dir][i][1];
1688             }
1689
1690         if(!(s->flags&CODEC_FLAG_GRAY))
1691             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1692         break;
1693     case MV_TYPE_FIELD:
1694         if (s->picture_structure == PICT_FRAME) {
1695             /* top field */
1696             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1697                         1, 0, s->field_select[dir][0],
1698                         ref_picture, pix_op,
1699                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
1700             /* bottom field */
1701             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1702                         1, 1, s->field_select[dir][1],
1703                         ref_picture, pix_op,
1704                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
1705         } else {
1706             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
1707                 ref_picture= s->current_picture_ptr->data;
1708             }
1709
1710             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1711                         0, 0, s->field_select[dir][0],
1712                         ref_picture, pix_op,
1713                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1714         }
1715         break;
1716     case MV_TYPE_16X8:
1717         for(i=0; i<2; i++){
1718             uint8_t ** ref2picture;
1719
1720             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
1721                 ref2picture= ref_picture;
1722             }else{
1723                 ref2picture= s->current_picture_ptr->data;
1724             }
1725
1726             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1727                         0, 0, s->field_select[dir][i],
1728                         ref2picture, pix_op,
1729                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
1730
1731             dest_y += 2*block_s*s->linesize;
1732             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1733             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1734         }
1735         break;
1736     case MV_TYPE_DMV:
1737         if(s->picture_structure == PICT_FRAME){
1738             for(i=0; i<2; i++){
1739                 int j;
1740                 for(j=0; j<2; j++){
1741                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1742                                 1, j, j^i,
1743                                 ref_picture, pix_op,
1744                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
1745                 }
1746                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1747             }
1748         }else{
1749             for(i=0; i<2; i++){
1750                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1751                             0, 0, s->picture_structure != i+1,
1752                             ref_picture, pix_op,
1753                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
1754
1755                 // after put we make avg of the same block
1756                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1757
1758                 //opposite parity is always in the same frame if this is second field
1759                 if(!s->first_field){
1760                     ref_picture = s->current_picture_ptr->data;
1761                 }
1762             }
1763         }
1764     break;
1765     default: assert(0);
1766     }
1767 }
1768
1769 /* put block[] to dest[] */
1770 static inline void put_dct(MpegEncContext *s,
1771                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1772 {
1773     s->dct_unquantize_intra(s, block, i, qscale);
1774     s->dsp.idct_put (dest, line_size, block);
1775 }
1776
1777 /* add block[] to dest[] */
1778 static inline void add_dct(MpegEncContext *s,
1779                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1780 {
1781     if (s->block_last_index[i] >= 0) {
1782         s->dsp.idct_add (dest, line_size, block);
1783     }
1784 }
1785
1786 static inline void add_dequant_dct(MpegEncContext *s,
1787                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1788 {
1789     if (s->block_last_index[i] >= 0) {
1790         s->dct_unquantize_inter(s, block, i, qscale);
1791
1792         s->dsp.idct_add (dest, line_size, block);
1793     }
1794 }
1795
1796 /**
1797  * cleans dc, ac, coded_block for the current non intra MB
1798  */
1799 void ff_clean_intra_table_entries(MpegEncContext *s)
1800 {
1801     int wrap = s->b8_stride;
1802     int xy = s->block_index[0];
1803
1804     s->dc_val[0][xy           ] =
1805     s->dc_val[0][xy + 1       ] =
1806     s->dc_val[0][xy     + wrap] =
1807     s->dc_val[0][xy + 1 + wrap] = 1024;
1808     /* ac pred */
1809     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1810     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1811     if (s->msmpeg4_version>=3) {
1812         s->coded_block[xy           ] =
1813         s->coded_block[xy + 1       ] =
1814         s->coded_block[xy     + wrap] =
1815         s->coded_block[xy + 1 + wrap] = 0;
1816     }
1817     /* chroma */
1818     wrap = s->mb_stride;
1819     xy = s->mb_x + s->mb_y * wrap;
1820     s->dc_val[1][xy] =
1821     s->dc_val[2][xy] = 1024;
1822     /* ac pred */
1823     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1824     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1825
1826     s->mbintra_table[xy]= 0;
1827 }
1828
1829 /* generic function called after a macroblock has been parsed by the
1830    decoder or after it has been encoded by the encoder.
1831
1832    Important variables used:
1833    s->mb_intra : true if intra macroblock
1834    s->mv_dir   : motion vector direction
1835    s->mv_type  : motion vector type
1836    s->mv       : motion vector
1837    s->interlaced_dct : true if interlaced dct used (mpeg2)
1838  */
1839 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
1840 {
1841     int mb_x, mb_y;
1842     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1843 #ifdef HAVE_XVMC
1844     if(s->avctx->xvmc_acceleration){
1845         XVMC_decode_mb(s);//xvmc uses pblocks
1846         return;
1847     }
1848 #endif
1849
1850     mb_x = s->mb_x;
1851     mb_y = s->mb_y;
1852
1853     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1854        /* save DCT coefficients */
1855        int i,j;
1856        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1857        for(i=0; i<6; i++)
1858            for(j=0; j<64; j++)
1859                *dct++ = block[i][s->dsp.idct_permutation[j]];
1860     }
1861
1862     s->current_picture.qscale_table[mb_xy]= s->qscale;
1863
1864     /* update DC predictors for P macroblocks */
1865     if (!s->mb_intra) {
1866         if (s->h263_pred || s->h263_aic) {
1867             if(s->mbintra_table[mb_xy])
1868                 ff_clean_intra_table_entries(s);
1869         } else {
1870             s->last_dc[0] =
1871             s->last_dc[1] =
1872             s->last_dc[2] = 128 << s->intra_dc_precision;
1873         }
1874     }
1875     else if (s->h263_pred || s->h263_aic)
1876         s->mbintra_table[mb_xy]=1;
1877
1878     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1879         uint8_t *dest_y, *dest_cb, *dest_cr;
1880         int dct_linesize, dct_offset;
1881         op_pixels_func (*op_pix)[4];
1882         qpel_mc_func (*op_qpix)[16];
1883         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1884         const int uvlinesize= s->current_picture.linesize[1];
1885         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1886         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1887
1888         /* avoid copy if macroblock skipped in last frame too */
1889         /* skip only during decoding as we might trash the buffers during encoding a bit */
1890         if(!s->encoding){
1891             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1892             const int age= s->current_picture.age;
1893
1894             assert(age);
1895
1896             if (s->mb_skipped) {
1897                 s->mb_skipped= 0;
1898                 assert(s->pict_type!=I_TYPE);
1899
1900                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1901                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1902
1903                 /* if previous was skipped too, then nothing to do !  */
1904                 if (*mbskip_ptr >= age && s->current_picture.reference){
1905                     return;
1906                 }
1907             } else if(!s->current_picture.reference){
1908                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1909                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1910             } else{
1911                 *mbskip_ptr = 0; /* not skipped */
1912             }
1913         }
1914
1915         dct_linesize = linesize << s->interlaced_dct;
1916         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1917
1918         if(readable){
1919             dest_y=  s->dest[0];
1920             dest_cb= s->dest[1];
1921             dest_cr= s->dest[2];
1922         }else{
1923             dest_y = s->b_scratchpad;
1924             dest_cb= s->b_scratchpad+16*linesize;
1925             dest_cr= s->b_scratchpad+32*linesize;
1926         }
1927
1928         if (!s->mb_intra) {
1929             /* motion handling */
1930             /* decoding or more than one mb_type (MC was already done otherwise) */
1931             if(!s->encoding){
1932                 if(lowres_flag){
1933                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1934
1935                     if (s->mv_dir & MV_DIR_FORWARD) {
1936                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1937                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1938                     }
1939                     if (s->mv_dir & MV_DIR_BACKWARD) {
1940                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1941                     }
1942                 }else{
1943                     op_qpix= s->me.qpel_put;
1944                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
1945                         op_pix = s->dsp.put_pixels_tab;
1946                     }else{
1947                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1948                     }
1949                     if (s->mv_dir & MV_DIR_FORWARD) {
1950                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1951                         op_pix = s->dsp.avg_pixels_tab;
1952                         op_qpix= s->me.qpel_avg;
1953                     }
1954                     if (s->mv_dir & MV_DIR_BACKWARD) {
1955                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1956                     }
1957                 }
1958             }
1959
1960             /* skip dequant / idct if we are really late ;) */
1961             if(s->hurry_up>1) goto skip_idct;
1962             if(s->avctx->skip_idct){
1963                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
1964                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
1965                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1966                     goto skip_idct;
1967             }
1968
1969             /* add dct residue */
1970             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1971                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1972                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1973                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1974                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1975                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1976
1977                 if(!(s->flags&CODEC_FLAG_GRAY)){
1978                     if (s->chroma_y_shift){
1979                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1980                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1981                     }else{
1982                         dct_linesize >>= 1;
1983                         dct_offset >>=1;
1984                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1985                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1986                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1987                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1988                     }
1989                 }
1990             } else if(s->codec_id != CODEC_ID_WMV2){
1991                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1992                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1993                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1994                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1995
1996                 if(!(s->flags&CODEC_FLAG_GRAY)){
1997                     if(s->chroma_y_shift){//Chroma420
1998                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
1999                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2000                     }else{
2001                         //chroma422
2002                         dct_linesize = uvlinesize << s->interlaced_dct;
2003                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2004
2005                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2006                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2007                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2008                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2009                         if(!s->chroma_x_shift){//Chroma444
2010                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2011                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2012                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2013                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2014                         }
2015                     }
2016                 }//fi gray
2017             }
2018             else if (ENABLE_WMV2) {
2019                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2020             }
2021         } else {
2022             /* dct only in intra block */
2023             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2024                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2025                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2026                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2027                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2028
2029                 if(!(s->flags&CODEC_FLAG_GRAY)){
2030                     if(s->chroma_y_shift){
2031                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2032                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2033                     }else{
2034                         dct_offset >>=1;
2035                         dct_linesize >>=1;
2036                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2037                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2038                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2039                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2040                     }
2041                 }
2042             }else{
2043                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2044                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2045                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2046                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2047
2048                 if(!(s->flags&CODEC_FLAG_GRAY)){
2049                     if(s->chroma_y_shift){
2050                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2051                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2052                     }else{
2053
2054                         dct_linesize = uvlinesize << s->interlaced_dct;
2055                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2056
2057                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2058                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2059                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2060                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2061                         if(!s->chroma_x_shift){//Chroma444
2062                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2063                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2064                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2065                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2066                         }
2067                     }
2068                 }//gray
2069             }
2070         }
2071 skip_idct:
2072         if(!readable){
2073             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2074             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2075             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2076         }
2077     }
2078 }
2079
2080 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2081     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
2082     else                  MPV_decode_mb_internal(s, block, 0);
2083 }
2084
2085 /**
2086  *
2087  * @param h is the normal height, this will be reduced automatically if needed for the last row
2088  */
2089 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2090     if (s->avctx->draw_horiz_band) {
2091         AVFrame *src;
2092         int offset[4];
2093
2094         if(s->picture_structure != PICT_FRAME){
2095             h <<= 1;
2096             y <<= 1;
2097             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2098         }
2099
2100         h= FFMIN(h, s->avctx->height - y);
2101
2102         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2103             src= (AVFrame*)s->current_picture_ptr;
2104         else if(s->last_picture_ptr)
2105             src= (AVFrame*)s->last_picture_ptr;
2106         else
2107             return;
2108
2109         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2110             offset[0]=
2111             offset[1]=
2112             offset[2]=
2113             offset[3]= 0;
2114         }else{
2115             offset[0]= y * s->linesize;;
2116             offset[1]=
2117             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2118             offset[3]= 0;
2119         }
2120
2121         emms_c();
2122
2123         s->avctx->draw_horiz_band(s->avctx, src, offset,
2124                                   y, s->picture_structure, h);
2125     }
2126 }
2127
2128 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2129     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2130     const int uvlinesize= s->current_picture.linesize[1];
2131     const int mb_size= 4 - s->avctx->lowres;
2132
2133     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2134     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2135     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2136     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2137     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2138     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2139     //block_index is not used by mpeg2, so it is not affected by chroma_format
2140
2141     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2142     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2143     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2144
2145     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2146     {
2147         s->dest[0] += s->mb_y *   linesize << mb_size;
2148         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2149         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2150     }
2151 }
2152
2153 void ff_mpeg_flush(AVCodecContext *avctx){
2154     int i;
2155     MpegEncContext *s = avctx->priv_data;
2156
2157     if(s==NULL || s->picture==NULL)
2158         return;
2159
2160     for(i=0; i<MAX_PICTURE_COUNT; i++){
2161        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2162                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2163         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
2164     }
2165     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2166
2167     s->mb_x= s->mb_y= 0;
2168
2169     s->parse_context.state= -1;
2170     s->parse_context.frame_start_found= 0;
2171     s->parse_context.overread= 0;
2172     s->parse_context.overread_index= 0;
2173     s->parse_context.index= 0;
2174     s->parse_context.last_index= 0;
2175     s->bitstream_buffer_size=0;
2176     s->pp_time=0;
2177 }
2178
2179 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2180                                    DCTELEM *block, int n, int qscale)
2181 {
2182     int i, level, nCoeffs;
2183     const uint16_t *quant_matrix;
2184
2185     nCoeffs= s->block_last_index[n];
2186
2187     if (n < 4)
2188         block[0] = block[0] * s->y_dc_scale;
2189     else
2190         block[0] = block[0] * s->c_dc_scale;
2191     /* XXX: only mpeg1 */
2192     quant_matrix = s->intra_matrix;
2193     for(i=1;i<=nCoeffs;i++) {
2194         int j= s->intra_scantable.permutated[i];
2195         level = block[j];
2196         if (level) {
2197             if (level < 0) {
2198                 level = -level;
2199                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2200                 level = (level - 1) | 1;
2201                 level = -level;
2202             } else {
2203                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2204                 level = (level - 1) | 1;
2205             }
2206             block[j] = level;
2207         }
2208     }
2209 }
2210
2211 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2212                                    DCTELEM *block, int n, int qscale)
2213 {
2214     int i, level, nCoeffs;
2215     const uint16_t *quant_matrix;
2216
2217     nCoeffs= s->block_last_index[n];
2218
2219     quant_matrix = s->inter_matrix;
2220     for(i=0; i<=nCoeffs; i++) {
2221         int j= s->intra_scantable.permutated[i];
2222         level = block[j];
2223         if (level) {
2224             if (level < 0) {
2225                 level = -level;
2226                 level = (((level << 1) + 1) * qscale *
2227                          ((int) (quant_matrix[j]))) >> 4;
2228                 level = (level - 1) | 1;
2229                 level = -level;
2230             } else {
2231                 level = (((level << 1) + 1) * qscale *
2232                          ((int) (quant_matrix[j]))) >> 4;
2233                 level = (level - 1) | 1;
2234             }
2235             block[j] = level;
2236         }
2237     }
2238 }
2239
2240 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2241                                    DCTELEM *block, int n, int qscale)
2242 {
2243     int i, level, nCoeffs;
2244     const uint16_t *quant_matrix;
2245
2246     if(s->alternate_scan) nCoeffs= 63;
2247     else nCoeffs= s->block_last_index[n];
2248
2249     if (n < 4)
2250         block[0] = block[0] * s->y_dc_scale;
2251     else
2252         block[0] = block[0] * s->c_dc_scale;
2253     quant_matrix = s->intra_matrix;
2254     for(i=1;i<=nCoeffs;i++) {
2255         int j= s->intra_scantable.permutated[i];
2256         level = block[j];
2257         if (level) {
2258             if (level < 0) {
2259                 level = -level;
2260                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2261                 level = -level;
2262             } else {
2263                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2264             }
2265             block[j] = level;
2266         }
2267     }
2268 }
2269
2270 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2271                                    DCTELEM *block, int n, int qscale)
2272 {
2273     int i, level, nCoeffs;
2274     const uint16_t *quant_matrix;
2275     int sum=-1;
2276
2277     if(s->alternate_scan) nCoeffs= 63;
2278     else nCoeffs= s->block_last_index[n];
2279
2280     if (n < 4)
2281         block[0] = block[0] * s->y_dc_scale;
2282     else
2283         block[0] = block[0] * s->c_dc_scale;
2284     quant_matrix = s->intra_matrix;
2285     for(i=1;i<=nCoeffs;i++) {
2286         int j= s->intra_scantable.permutated[i];
2287         level = block[j];
2288         if (level) {
2289             if (level < 0) {
2290                 level = -level;
2291                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2292                 level = -level;
2293             } else {
2294                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2295             }
2296             block[j] = level;
2297             sum+=level;
2298         }
2299     }
2300     block[63]^=sum&1;
2301 }
2302
2303 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2304                                    DCTELEM *block, int n, int qscale)
2305 {
2306     int i, level, nCoeffs;
2307     const uint16_t *quant_matrix;
2308     int sum=-1;
2309
2310     if(s->alternate_scan) nCoeffs= 63;
2311     else nCoeffs= s->block_last_index[n];
2312
2313     quant_matrix = s->inter_matrix;
2314     for(i=0; i<=nCoeffs; i++) {
2315         int j= s->intra_scantable.permutated[i];
2316         level = block[j];
2317         if (level) {
2318             if (level < 0) {
2319                 level = -level;
2320                 level = (((level << 1) + 1) * qscale *
2321                          ((int) (quant_matrix[j]))) >> 4;
2322                 level = -level;
2323             } else {
2324                 level = (((level << 1) + 1) * qscale *
2325                          ((int) (quant_matrix[j]))) >> 4;
2326             }
2327             block[j] = level;
2328             sum+=level;
2329         }
2330     }
2331     block[63]^=sum&1;
2332 }
2333
2334 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2335                                   DCTELEM *block, int n, int qscale)
2336 {
2337     int i, level, qmul, qadd;
2338     int nCoeffs;
2339
2340     assert(s->block_last_index[n]>=0);
2341
2342     qmul = qscale << 1;
2343
2344     if (!s->h263_aic) {
2345         if (n < 4)
2346             block[0] = block[0] * s->y_dc_scale;
2347         else
2348             block[0] = block[0] * s->c_dc_scale;
2349         qadd = (qscale - 1) | 1;
2350     }else{
2351         qadd = 0;
2352     }
2353     if(s->ac_pred)
2354         nCoeffs=63;
2355     else
2356         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2357
2358     for(i=1; i<=nCoeffs; i++) {
2359         level = block[i];
2360         if (level) {
2361             if (level < 0) {
2362                 level = level * qmul - qadd;
2363             } else {
2364                 level = level * qmul + qadd;
2365             }
2366             block[i] = level;
2367         }
2368     }
2369 }
2370
2371 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2372                                   DCTELEM *block, int n, int qscale)
2373 {
2374     int i, level, qmul, qadd;
2375     int nCoeffs;
2376
2377     assert(s->block_last_index[n]>=0);
2378
2379     qadd = (qscale - 1) | 1;
2380     qmul = qscale << 1;
2381
2382     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2383
2384     for(i=0; i<=nCoeffs; i++) {
2385         level = block[i];
2386         if (level) {
2387             if (level < 0) {
2388                 level = level * qmul - qadd;
2389             } else {
2390                 level = level * qmul + qadd;
2391             }
2392             block[i] = level;
2393         }
2394     }
2395 }
2396
2397 /**
2398  * set qscale and update qscale dependent variables.
2399  */
2400 void ff_set_qscale(MpegEncContext * s, int qscale)
2401 {
2402     if (qscale < 1)
2403         qscale = 1;
2404     else if (qscale > 31)
2405         qscale = 31;
2406
2407     s->qscale = qscale;
2408     s->chroma_qscale= s->chroma_qscale_table[qscale];
2409
2410     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2411     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2412 }