git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "avcodec.h"
  32 #include "dsputil.h"
  33 #include "internal.h"
  34 #include "mpegvideo.h"
  35 #include "mpegvideo_common.h"
  36 #include "mjpegenc.h"
  37 #include "msmpeg4.h"
  38 #include "faandct.h"
  39 #include "xvmc_internal.h"
  40 #include <limits.h>
  41
  42 //#undef NDEBUG
  43 //#include <assert.h>
  44
  45 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  46                                    DCTELEM *block, int n, int qscale);
  47 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  48                                    DCTELEM *block, int n, int qscale);
  49 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  50                                    DCTELEM *block, int n, int qscale);
  51 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  52                                    DCTELEM *block, int n, int qscale);
  53 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  54                                    DCTELEM *block, int n, int qscale);
  55 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  56                                   DCTELEM *block, int n, int qscale);
  57 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  58                                   DCTELEM *block, int n, int qscale);
  59
  60
  61 /* enable all paranoid tests for rounding, overflows, etc... */
  62 //#define PARANOID
  63
  64 //#define DEBUG
  65
  66
  67 static const uint8_t ff_default_chroma_qscale_table[32]={
  68 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  69     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  70 };
  71
  72 const uint8_t ff_mpeg1_dc_scale_table[128]={
  73 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  74     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  75     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  76     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  77     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  78 };
  79
  80 static const uint8_t mpeg2_dc_scale_table1[128]={
  81 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  82     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  84     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  85     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  86 };
  87
  88 static const uint8_t mpeg2_dc_scale_table2[128]={
  89 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  90     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  91     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94 };
  95
  96 static const uint8_t mpeg2_dc_scale_table3[128]={
  97 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  98     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  99     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 100     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 101     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 102 };
 103
 104 const uint8_t * const ff_mpeg2_dc_scale_table[4]={
 105     ff_mpeg1_dc_scale_table,
 106     mpeg2_dc_scale_table1,
 107     mpeg2_dc_scale_table2,
 108     mpeg2_dc_scale_table3,
 109 };
 110
 111 const enum PixelFormat ff_pixfmt_list_420[] = {
 112     PIX_FMT_YUV420P,
 113     PIX_FMT_NONE
 114 };
 115
 116 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
 117     PIX_FMT_DXVA2_VLD,
 118     PIX_FMT_VAAPI_VLD,
 119     PIX_FMT_YUV420P,
 120     PIX_FMT_NONE
 121 };
 122
 123 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 124     int i;
 125
 126     assert(p<=end);
 127     if(p>=end)
 128         return end;
 129
 130     for(i=0; i<3; i++){
 131         uint32_t tmp= *state << 8;
 132         *state= tmp + *(p++);
 133         if(tmp == 0x100 || p==end)
 134             return p;
 135     }
 136
 137     while(p<end){
 138         if     (p[-1] > 1      ) p+= 3;
 139         else if(p[-2]          ) p+= 2;
 140         else if(p[-3]|(p[-1]-1)) p++;
 141         else{
 142             p++;
 143             break;
 144         }
 145     }
 146
 147     p= FFMIN(p, end)-4;
 148     *state= AV_RB32(p);
 149
 150     return p+4;
 151 }
 152
 153 /* init common dct for both encoder and decoder */
 154 av_cold int ff_dct_common_init(MpegEncContext *s)
 155 {
 156     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 157     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 158     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 159     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 160     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 161     if(s->flags & CODEC_FLAG_BITEXACT)
 162         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 163     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 164
 165 #if   HAVE_MMX
 166     MPV_common_init_mmx(s);
 167 #elif ARCH_ALPHA
 168     MPV_common_init_axp(s);
 169 #elif CONFIG_MLIB
 170     MPV_common_init_mlib(s);
 171 #elif HAVE_MMI
 172     MPV_common_init_mmi(s);
 173 #elif ARCH_ARM
 174     MPV_common_init_arm(s);
 175 #elif HAVE_ALTIVEC
 176     MPV_common_init_altivec(s);
 177 #elif ARCH_BFIN
 178     MPV_common_init_bfin(s);
 179 #endif
 180
 181     /* load & permutate scantables
 182        note: only wmv uses different ones
 183     */
 184     if(s->alternate_scan){
 185         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 186         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 187     }else{
 188         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 189         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 190     }
 191     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 192     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 193
 194     return 0;
 195 }
 196
 197 void ff_copy_picture(Picture *dst, Picture *src){
 198     *dst = *src;
 199     dst->type= FF_BUFFER_TYPE_COPY;
 200 }
 201
 202 /**
 203  * Release a frame buffer
 204  */
 205 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 206 {
 207     s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 208     av_freep(&pic->hwaccel_picture_private);
 209 }
 210
 211 /**
 212  * Allocate a frame buffer
 213  */
 214 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
 215 {
 216     int r;
 217
 218     if (s->avctx->hwaccel) {
 219         assert(!pic->hwaccel_picture_private);
 220         if (s->avctx->hwaccel->priv_data_size) {
 221             pic->hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
 222             if (!pic->hwaccel_picture_private) {
 223                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
 224                 return -1;
 225             }
 226         }
 227     }
 228
 229     r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 230
 231     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
 232         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 233         av_freep(&pic->hwaccel_picture_private);
 234         return -1;
 235     }
 236
 237     if (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])) {
 238         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 239         free_frame_buffer(s, pic);
 240         return -1;
 241     }
 242
 243     if (pic->linesize[1] != pic->linesize[2]) {
 244         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 245         free_frame_buffer(s, pic);
 246         return -1;
 247     }
 248
 249     return 0;
 250 }
 251
 252 /**
 253  * allocates a Picture
 254  * The pixels are allocated/set by calling get_buffer() if shared=0
 255  */
 256 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 257     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 258     const int mb_array_size= s->mb_stride*s->mb_height;
 259     const int b8_array_size= s->b8_stride*s->mb_height*2;
 260     const int b4_array_size= s->b4_stride*s->mb_height*4;
 261     int i;
 262     int r= -1;
 263
 264     if(shared){
 265         assert(pic->data[0]);
 266         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 267         pic->type= FF_BUFFER_TYPE_SHARED;
 268     }else{
 269         assert(!pic->data[0]);
 270
 271         if (alloc_frame_buffer(s, pic) < 0)
 272             return -1;
 273
 274         s->linesize  = pic->linesize[0];
 275         s->uvlinesize= pic->linesize[1];
 276     }
 277
 278     if(pic->qscale_table==NULL){
 279         if (s->encoding) {
 280             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var   , mb_array_size * sizeof(int16_t)  , fail)
 281             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var, mb_array_size * sizeof(int16_t)  , fail)
 282             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean  , mb_array_size * sizeof(int8_t )  , fail)
 283         }
 284
 285         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2, fail) //the +2 is for the slice end check
 286         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table , mb_array_size * sizeof(uint8_t)  , fail)
 287         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t), fail)
 288         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
 289         if(s->out_format == FMT_H264){
 290             for(i=0; i<2; i++){
 291                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t), fail)
 292                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 293                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 294             }
 295             pic->motion_subsample_log2= 2;
 296         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 297             for(i=0; i<2; i++){
 298                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t), fail)
 299                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 300                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 301             }
 302             pic->motion_subsample_log2= 3;
 303         }
 304         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 305             FF_ALLOCZ_OR_GOTO(s->avctx, pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6, fail)
 306         }
 307         pic->qstride= s->mb_stride;
 308         FF_ALLOCZ_OR_GOTO(s->avctx, pic->pan_scan , 1 * sizeof(AVPanScan), fail)
 309     }
 310
 311     /* It might be nicer if the application would keep track of these
 312      * but it would require an API change. */
 313     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 314     s->prev_pict_types[0]= s->dropable ? FF_B_TYPE : s->pict_type;
 315     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == FF_B_TYPE)
 316         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 317
 318     return 0;
 319 fail: //for the FF_ALLOCZ_OR_GOTO macro
 320     if(r>=0)
 321         free_frame_buffer(s, pic);
 322     return -1;
 323 }
 324
 325 /**
 326  * deallocates a picture
 327  */
 328 static void free_picture(MpegEncContext *s, Picture *pic){
 329     int i;
 330
 331     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 332         free_frame_buffer(s, pic);
 333     }
 334
 335     av_freep(&pic->mb_var);
 336     av_freep(&pic->mc_mb_var);
 337     av_freep(&pic->mb_mean);
 338     av_freep(&pic->mbskip_table);
 339     av_freep(&pic->qscale_table);
 340     av_freep(&pic->mb_type_base);
 341     av_freep(&pic->dct_coeff);
 342     av_freep(&pic->pan_scan);
 343     pic->mb_type= NULL;
 344     for(i=0; i<2; i++){
 345         av_freep(&pic->motion_val_base[i]);
 346         av_freep(&pic->ref_index[i]);
 347     }
 348
 349     if(pic->type == FF_BUFFER_TYPE_SHARED){
 350         for(i=0; i<4; i++){
 351             pic->base[i]=
 352             pic->data[i]= NULL;
 353         }
 354         pic->type= 0;
 355     }
 356 }
 357
 358 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 359     int y_size = s->b8_stride * (2 * s->mb_height + 1);
 360     int c_size = s->mb_stride * (s->mb_height + 1);
 361     int yc_size = y_size + 2 * c_size;
 362     int i;
 363
 364     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 365     FF_ALLOCZ_OR_GOTO(s->avctx, s->allocated_edge_emu_buffer, (s->width+64)*2*21*2, fail); //(width + edge + align)*interlaced*MBsize*tolerance
 366     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 367
 368      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 369     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t), fail)
 370     s->me.temp=         s->me.scratchpad;
 371     s->rd_scratchpad=   s->me.scratchpad;
 372     s->b_scratchpad=    s->me.scratchpad;
 373     s->obmc_scratchpad= s->me.scratchpad + 16;
 374     if (s->encoding) {
 375         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map      , ME_MAP_SIZE*sizeof(uint32_t), fail)
 376         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t), fail)
 377         if(s->avctx->noise_reduction){
 378             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum, 2 * 64 * sizeof(int), fail)
 379         }
 380     }
 381     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64*12*2 * sizeof(DCTELEM), fail)
 382     s->block= s->blocks[0];
 383
 384     for(i=0;i<12;i++){
 385         s->pblocks[i] = &s->block[i];
 386     }
 387
 388     if (s->ac_val_base) {
 389         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base, yc_size * sizeof(int16_t) * 16, fail);
 390         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 391         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 392         s->ac_val[2] = s->ac_val[1] + c_size;
 393     }
 394
 395     return 0;
 396 fail:
 397     return -1; //free() through MPV_common_end()
 398 }
 399
 400 static void free_duplicate_context(MpegEncContext *s){
 401     if(s==NULL) return;
 402
 403     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 404     av_freep(&s->me.scratchpad);
 405     s->me.temp=
 406     s->rd_scratchpad=
 407     s->b_scratchpad=
 408     s->obmc_scratchpad= NULL;
 409
 410     av_freep(&s->dct_error_sum);
 411     av_freep(&s->me.map);
 412     av_freep(&s->me.score_map);
 413     av_freep(&s->blocks);
 414     av_freep(&s->ac_val_base);
 415     s->block= NULL;
 416 }
 417
 418 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 419 #define COPY(a) bak->a= src->a
 420     COPY(allocated_edge_emu_buffer);
 421     COPY(edge_emu_buffer);
 422     COPY(me.scratchpad);
 423     COPY(me.temp);
 424     COPY(rd_scratchpad);
 425     COPY(b_scratchpad);
 426     COPY(obmc_scratchpad);
 427     COPY(me.map);
 428     COPY(me.score_map);
 429     COPY(blocks);
 430     COPY(block);
 431     COPY(start_mb_y);
 432     COPY(end_mb_y);
 433     COPY(me.map_generation);
 434     COPY(pb);
 435     COPY(dct_error_sum);
 436     COPY(dct_count[0]);
 437     COPY(dct_count[1]);
 438     COPY(ac_val_base);
 439     COPY(ac_val[0]);
 440     COPY(ac_val[1]);
 441     COPY(ac_val[2]);
 442 #undef COPY
 443 }
 444
 445 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 446     MpegEncContext bak;
 447     int i;
 448     //FIXME copy only needed parts
 449 //START_TIMER
 450     backup_duplicate_context(&bak, dst);
 451     memcpy(dst, src, sizeof(MpegEncContext));
 452     backup_duplicate_context(dst, &bak);
 453     for(i=0;i<12;i++){
 454         dst->pblocks[i] = &dst->block[i];
 455     }
 456 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 457 }
 458
 459 /**
 460  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 461  * the changed fields will not depend upon the prior state of the MpegEncContext.
 462  */
 463 void MPV_common_defaults(MpegEncContext *s){
 464     s->y_dc_scale_table=
 465     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 466     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 467     s->progressive_frame= 1;
 468     s->progressive_sequence= 1;
 469     s->picture_structure= PICT_FRAME;
 470
 471     s->coded_picture_number = 0;
 472     s->picture_number = 0;
 473     s->input_picture_number = 0;
 474
 475     s->picture_in_gop_number = 0;
 476
 477     s->f_code = 1;
 478     s->b_code = 1;
 479 }
 480
 481 /**
 482  * sets the given MpegEncContext to defaults for decoding.
 483  * the changed fields will not depend upon the prior state of the MpegEncContext.
 484  */
 485 void MPV_decode_defaults(MpegEncContext *s){
 486     MPV_common_defaults(s);
 487 }
 488
 489 /**
 490  * init common structure for both encoder and decoder.
 491  * this assumes that some variables like width/height are already set
 492  */
 493 av_cold int MPV_common_init(MpegEncContext *s)
 494 {
 495     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
 496
 497     if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
 498         s->mb_height = (s->height + 31) / 32 * 2;
 499     else
 500         s->mb_height = (s->height + 15) / 16;
 501
 502     if(s->avctx->pix_fmt == PIX_FMT_NONE){
 503         av_log(s->avctx, AV_LOG_ERROR, "decoding to PIX_FMT_NONE is not supported.\n");
 504         return -1;
 505     }
 506
 507     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 508         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 509         return -1;
 510     }
 511
 512     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
 513         return -1;
 514
 515     dsputil_init(&s->dsp, s->avctx);
 516     ff_dct_common_init(s);
 517
 518     s->flags= s->avctx->flags;
 519     s->flags2= s->avctx->flags2;
 520
 521     s->mb_width  = (s->width  + 15) / 16;
 522     s->mb_stride = s->mb_width + 1;
 523     s->b8_stride = s->mb_width*2 + 1;
 524     s->b4_stride = s->mb_width*4 + 1;
 525     mb_array_size= s->mb_height * s->mb_stride;
 526     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 527
 528     /* set chroma shifts */
 529     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 530                                                     &(s->chroma_y_shift) );
 531
 532     /* set default edge pos, will be overriden in decode_header if needed */
 533     s->h_edge_pos= s->mb_width*16;
 534     s->v_edge_pos= s->mb_height*16;
 535
 536     s->mb_num = s->mb_width * s->mb_height;
 537
 538     s->block_wrap[0]=
 539     s->block_wrap[1]=
 540     s->block_wrap[2]=
 541     s->block_wrap[3]= s->b8_stride;
 542     s->block_wrap[4]=
 543     s->block_wrap[5]= s->mb_stride;
 544
 545     y_size = s->b8_stride * (2 * s->mb_height + 1);
 546     c_size = s->mb_stride * (s->mb_height + 1);
 547     yc_size = y_size + 2 * c_size;
 548
 549     /* convert fourcc to upper case */
 550     s->codec_tag = ff_toupper4(s->avctx->codec_tag);
 551
 552     s->stream_codec_tag = ff_toupper4(s->avctx->stream_codec_tag);
 553
 554     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 555
 556     FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
 557     for(y=0; y<s->mb_height; y++){
 558         for(x=0; x<s->mb_width; x++){
 559             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 560         }
 561     }
 562     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 563
 564     if (s->encoding) {
 565         /* Allocate MV tables */
 566         FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
 567         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 568         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 569         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 570         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 571         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
 572         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 573         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 574         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 575         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 576         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 577         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 578
 579         if(s->msmpeg4_version){
 580             FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
 581         }
 582         FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 583
 584         /* Allocate MB type table */
 585         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
 586
 587         FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
 588
 589         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
 590         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
 591         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 592         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 593         FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 594         FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 595
 596         if(s->avctx->noise_reduction){
 597             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
 598         }
 599     }
 600     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
 601     for(i = 0; i < MAX_PICTURE_COUNT; i++) {
 602         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
 603     }
 604
 605     FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
 606
 607     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 608         /* interlaced direct mode decoding tables */
 609             for(i=0; i<2; i++){
 610                 int j, k;
 611                 for(j=0; j<2; j++){
 612                     for(k=0; k<2; k++){
 613                         FF_ALLOCZ_OR_GOTO(s->avctx,    s->b_field_mv_table_base[i][j][k], mv_table_size * 2 * sizeof(int16_t), fail)
 614                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1;
 615                     }
 616                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j], mb_array_size * 2 * sizeof(uint8_t), fail)
 617                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j], mv_table_size * 2 * sizeof(int16_t), fail)
 618                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]+ s->mb_stride + 1;
 619                 }
 620                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
 621             }
 622     }
 623     if (s->out_format == FMT_H263) {
 624         /* ac values */
 625         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base, yc_size * sizeof(int16_t) * 16, fail);
 626         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 627         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 628         s->ac_val[2] = s->ac_val[1] + c_size;
 629
 630         /* cbp values */
 631         FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
 632         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 633
 634         /* cbp, ac_pred, pred_dir */
 635         FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
 636         FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
 637     }
 638
 639     if (s->h263_pred || s->h263_plus || !s->encoding) {
 640         /* dc values */
 641         //MN: we need these for error resilience of intra-frames
 642         FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
 643         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 644         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 645         s->dc_val[2] = s->dc_val[1] + c_size;
 646         for(i=0;i<yc_size;i++)
 647             s->dc_val_base[i] = 1024;
 648     }
 649
 650     /* which mb is a intra block */
 651     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
 652     memset(s->mbintra_table, 1, mb_array_size);
 653
 654     /* init macroblock skip table */
 655     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
 656     //Note the +1 is for a quicker mpeg4 slice_end detection
 657     FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
 658
 659     s->parse_context.state= -1;
 660     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 661        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 662        s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 663        s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 664     }
 665
 666     s->context_initialized = 1;
 667
 668     s->thread_context[0]= s;
 669     threads = s->avctx->thread_count;
 670
 671     for(i=1; i<threads; i++){
 672         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 673         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 674     }
 675
 676     for(i=0; i<threads; i++){
 677         if(init_duplicate_context(s->thread_context[i], s) < 0)
 678            goto fail;
 679         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 680         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 681     }
 682
 683     return 0;
 684  fail:
 685     MPV_common_end(s);
 686     return -1;
 687 }
 688
 689 /* init common structure for both encoder and decoder */
 690 void MPV_common_end(MpegEncContext *s)
 691 {
 692     int i, j, k;
 693
 694     for(i=0; i<s->avctx->thread_count; i++){
 695         free_duplicate_context(s->thread_context[i]);
 696     }
 697     for(i=1; i<s->avctx->thread_count; i++){
 698         av_freep(&s->thread_context[i]);
 699     }
 700
 701     av_freep(&s->parse_context.buffer);
 702     s->parse_context.buffer_size=0;
 703
 704     av_freep(&s->mb_type);
 705     av_freep(&s->p_mv_table_base);
 706     av_freep(&s->b_forw_mv_table_base);
 707     av_freep(&s->b_back_mv_table_base);
 708     av_freep(&s->b_bidir_forw_mv_table_base);
 709     av_freep(&s->b_bidir_back_mv_table_base);
 710     av_freep(&s->b_direct_mv_table_base);
 711     s->p_mv_table= NULL;
 712     s->b_forw_mv_table= NULL;
 713     s->b_back_mv_table= NULL;
 714     s->b_bidir_forw_mv_table= NULL;
 715     s->b_bidir_back_mv_table= NULL;
 716     s->b_direct_mv_table= NULL;
 717     for(i=0; i<2; i++){
 718         for(j=0; j<2; j++){
 719             for(k=0; k<2; k++){
 720                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 721                 s->b_field_mv_table[i][j][k]=NULL;
 722             }
 723             av_freep(&s->b_field_select_table[i][j]);
 724             av_freep(&s->p_field_mv_table_base[i][j]);
 725             s->p_field_mv_table[i][j]=NULL;
 726         }
 727         av_freep(&s->p_field_select_table[i]);
 728     }
 729
 730     av_freep(&s->dc_val_base);
 731     av_freep(&s->ac_val_base);
 732     av_freep(&s->coded_block_base);
 733     av_freep(&s->mbintra_table);
 734     av_freep(&s->cbp_table);
 735     av_freep(&s->pred_dir_table);
 736
 737     av_freep(&s->mbskip_table);
 738     av_freep(&s->prev_pict_types);
 739     av_freep(&s->bitstream_buffer);
 740     s->allocated_bitstream_buffer_size=0;
 741
 742     av_freep(&s->avctx->stats_out);
 743     av_freep(&s->ac_stats);
 744     av_freep(&s->error_status_table);
 745     av_freep(&s->mb_index2xy);
 746     av_freep(&s->lambda_table);
 747     av_freep(&s->q_intra_matrix);
 748     av_freep(&s->q_inter_matrix);
 749     av_freep(&s->q_intra_matrix16);
 750     av_freep(&s->q_inter_matrix16);
 751     av_freep(&s->input_picture);
 752     av_freep(&s->reordered_input_picture);
 753     av_freep(&s->dct_offset);
 754
 755     if(s->picture){
 756         for(i=0; i<MAX_PICTURE_COUNT; i++){
 757             free_picture(s, &s->picture[i]);
 758         }
 759     }
 760     av_freep(&s->picture);
 761     s->context_initialized = 0;
 762     s->last_picture_ptr=
 763     s->next_picture_ptr=
 764     s->current_picture_ptr= NULL;
 765     s->linesize= s->uvlinesize= 0;
 766
 767     for(i=0; i<3; i++)
 768         av_freep(&s->visualization_buffer[i]);
 769
 770     avcodec_default_free_buffers(s->avctx);
 771 }
 772
 773 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
 774 {
 775     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 776     uint8_t index_run[MAX_RUN+1];
 777     int last, run, level, start, end, i;
 778
 779     /* If table is static, we can quit if rl->max_level[0] is not NULL */
 780     if(static_store && rl->max_level[0])
 781         return;
 782
 783     /* compute max_level[], max_run[] and index_run[] */
 784     for(last=0;last<2;last++) {
 785         if (last == 0) {
 786             start = 0;
 787             end = rl->last;
 788         } else {
 789             start = rl->last;
 790             end = rl->n;
 791         }
 792
 793         memset(max_level, 0, MAX_RUN + 1);
 794         memset(max_run, 0, MAX_LEVEL + 1);
 795         memset(index_run, rl->n, MAX_RUN + 1);
 796         for(i=start;i<end;i++) {
 797             run = rl->table_run[i];
 798             level = rl->table_level[i];
 799             if (index_run[run] == rl->n)
 800                 index_run[run] = i;
 801             if (level > max_level[run])
 802                 max_level[run] = level;
 803             if (run > max_run[level])
 804                 max_run[level] = run;
 805         }
 806         if(static_store)
 807             rl->max_level[last] = static_store[last];
 808         else
 809             rl->max_level[last] = av_malloc(MAX_RUN + 1);
 810         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 811         if(static_store)
 812             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
 813         else
 814             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 815         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 816         if(static_store)
 817             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
 818         else
 819             rl->index_run[last] = av_malloc(MAX_RUN + 1);
 820         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 821     }
 822 }
 823
 824 void init_vlc_rl(RLTable *rl)
 825 {
 826     int i, q;
 827
 828     for(q=0; q<32; q++){
 829         int qmul= q*2;
 830         int qadd= (q-1)|1;
 831
 832         if(q==0){
 833             qmul=1;
 834             qadd=0;
 835         }
 836         for(i=0; i<rl->vlc.table_size; i++){
 837             int code= rl->vlc.table[i][0];
 838             int len = rl->vlc.table[i][1];
 839             int level, run;
 840
 841             if(len==0){ // illegal code
 842                 run= 66;
 843                 level= MAX_LEVEL;
 844             }else if(len<0){ //more bits needed
 845                 run= 0;
 846                 level= code;
 847             }else{
 848                 if(code==rl->n){ //esc
 849                     run= 66;
 850                     level= 0;
 851                 }else{
 852                     run=   rl->table_run  [code] + 1;
 853                     level= rl->table_level[code] * qmul + qadd;
 854                     if(code >= rl->last) run+=192;
 855                 }
 856             }
 857             rl->rl_vlc[q][i].len= len;
 858             rl->rl_vlc[q][i].level= level;
 859             rl->rl_vlc[q][i].run= run;
 860         }
 861     }
 862 }
 863
 864 int ff_find_unused_picture(MpegEncContext *s, int shared){
 865     int i;
 866
 867     if(shared){
 868         for(i=0; i<MAX_PICTURE_COUNT; i++){
 869             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
 870         }
 871     }else{
 872         for(i=0; i<MAX_PICTURE_COUNT; i++){
 873             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
 874         }
 875         for(i=0; i<MAX_PICTURE_COUNT; i++){
 876             if(s->picture[i].data[0]==NULL) return i;
 877         }
 878     }
 879
 880     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
 881     /* We could return -1, but the codec would crash trying to draw into a
 882      * non-existing frame anyway. This is safer than waiting for a random crash.
 883      * Also the return of this is never useful, an encoder must only allocate
 884      * as much as allowed in the specification. This has no relationship to how
 885      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
 886      * enough for such valid streams).
 887      * Plus, a decoder has to check stream validity and remove frames if too
 888      * many reference frames are around. Waiting for "OOM" is not correct at
 889      * all. Similarly, missing reference frames have to be replaced by
 890      * interpolated/MC frames, anything else is a bug in the codec ...
 891      */
 892     abort();
 893     return -1;
 894 }
 895
 896 static void update_noise_reduction(MpegEncContext *s){
 897     int intra, i;
 898
 899     for(intra=0; intra<2; intra++){
 900         if(s->dct_count[intra] > (1<<16)){
 901             for(i=0; i<64; i++){
 902                 s->dct_error_sum[intra][i] >>=1;
 903             }
 904             s->dct_count[intra] >>= 1;
 905         }
 906
 907         for(i=0; i<64; i++){
 908             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
 909         }
 910     }
 911 }
 912
 913 /**
 914  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
 915  */
 916 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 917 {
 918     int i;
 919     Picture *pic;
 920     s->mb_skipped = 0;
 921
 922     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 923
 924     /* mark&release old frames */
 925     if (s->pict_type != FF_B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
 926       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
 927           free_frame_buffer(s, s->last_picture_ptr);
 928
 929         /* release forgotten pictures */
 930         /* if(mpeg124/h263) */
 931         if(!s->encoding){
 932             for(i=0; i<MAX_PICTURE_COUNT; i++){
 933                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 934                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
 935                     free_frame_buffer(s, &s->picture[i]);
 936                 }
 937             }
 938         }
 939       }
 940     }
 941
 942     if(!s->encoding){
 943         /* release non reference frames */
 944         for(i=0; i<MAX_PICTURE_COUNT; i++){
 945             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 946                 free_frame_buffer(s, &s->picture[i]);
 947             }
 948         }
 949
 950         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
 951             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
 952         else{
 953             i= ff_find_unused_picture(s, 0);
 954             pic= &s->picture[i];
 955         }
 956
 957         pic->reference= 0;
 958         if (!s->dropable){
 959             if (s->codec_id == CODEC_ID_H264)
 960                 pic->reference = s->picture_structure;
 961             else if (s->pict_type != FF_B_TYPE)
 962                 pic->reference = 3;
 963         }
 964
 965         pic->coded_picture_number= s->coded_picture_number++;
 966
 967         if(ff_alloc_picture(s, pic, 0) < 0)
 968             return -1;
 969
 970         s->current_picture_ptr= pic;
 971         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
 972         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
 973     }
 974
 975     s->current_picture_ptr->pict_type= s->pict_type;
 976 //    if(s->flags && CODEC_FLAG_QSCALE)
 977   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
 978     s->current_picture_ptr->key_frame= s->pict_type == FF_I_TYPE;
 979
 980     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
 981
 982     if (s->pict_type != FF_B_TYPE) {
 983         s->last_picture_ptr= s->next_picture_ptr;
 984         if(!s->dropable)
 985             s->next_picture_ptr= s->current_picture_ptr;
 986     }
 987 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
 988         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
 989         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
 990         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
 991         s->pict_type, s->dropable);*/
 992
 993     if(s->codec_id != CODEC_ID_H264){
 994         if((s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && s->pict_type!=FF_I_TYPE){
 995             av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
 996             /* Allocate a dummy frame */
 997             i= ff_find_unused_picture(s, 0);
 998             s->last_picture_ptr= &s->picture[i];
 999             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
1000                 return -1;
1001         }
1002         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==FF_B_TYPE){
1003             /* Allocate a dummy frame */
1004             i= ff_find_unused_picture(s, 0);
1005             s->next_picture_ptr= &s->picture[i];
1006             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
1007                 return -1;
1008         }
1009     }
1010
1011     if(s->last_picture_ptr) ff_copy_picture(&s->last_picture, s->last_picture_ptr);
1012     if(s->next_picture_ptr) ff_copy_picture(&s->next_picture, s->next_picture_ptr);
1013
1014     assert(s->pict_type == FF_I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1015
1016     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
1017         int i;
1018         for(i=0; i<4; i++){
1019             if(s->picture_structure == PICT_BOTTOM_FIELD){
1020                  s->current_picture.data[i] += s->current_picture.linesize[i];
1021             }
1022             s->current_picture.linesize[i] *= 2;
1023             s->last_picture.linesize[i] *=2;
1024             s->next_picture.linesize[i] *=2;
1025         }
1026     }
1027
1028     s->hurry_up= s->avctx->hurry_up;
1029     s->error_recognition= avctx->error_recognition;
1030
1031     /* set dequantizer, we can't do it during init as it might change for mpeg4
1032        and we can't do it in the header decode as init is not called for mpeg4 there yet */
1033     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1034         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1035         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1036     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1037         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1038         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1039     }else{
1040         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1041         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1042     }
1043
1044     if(s->dct_error_sum){
1045         assert(s->avctx->noise_reduction && s->encoding);
1046
1047         update_noise_reduction(s);
1048     }
1049
1050     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1051         return ff_xvmc_field_start(s, avctx);
1052
1053     return 0;
1054 }
1055
1056 /* generic function for encode/decode called after a frame has been coded/decoded */
1057 void MPV_frame_end(MpegEncContext *s)
1058 {
1059     int i;
1060     /* draw edge for correct motion prediction if outside */
1061     //just to make sure that all data is rendered.
1062     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1063         ff_xvmc_field_end(s);
1064     }else if(!s->avctx->hwaccel
1065        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1066        && s->unrestricted_mv
1067        && s->current_picture.reference
1068        && !s->intra_only
1069        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1070             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1071             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1072             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1073     }
1074     emms_c();
1075
1076     s->last_pict_type    = s->pict_type;
1077     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1078     if(s->pict_type!=FF_B_TYPE){
1079         s->last_non_b_pict_type= s->pict_type;
1080     }
1081 #if 0
1082         /* copy back current_picture variables */
1083     for(i=0; i<MAX_PICTURE_COUNT; i++){
1084         if(s->picture[i].data[0] == s->current_picture.data[0]){
1085             s->picture[i]= s->current_picture;
1086             break;
1087         }
1088     }
1089     assert(i<MAX_PICTURE_COUNT);
1090 #endif
1091
1092     if(s->encoding){
1093         /* release non-reference frames */
1094         for(i=0; i<MAX_PICTURE_COUNT; i++){
1095             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1096                 free_frame_buffer(s, &s->picture[i]);
1097             }
1098         }
1099     }
1100     // clear copies, to avoid confusion
1101 #if 0
1102     memset(&s->last_picture, 0, sizeof(Picture));
1103     memset(&s->next_picture, 0, sizeof(Picture));
1104     memset(&s->current_picture, 0, sizeof(Picture));
1105 #endif
1106     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1107 }
1108
1109 /**
1110  * draws an line from (ex, ey) -> (sx, sy).
1111  * @param w width of the image
1112  * @param h height of the image
1113  * @param stride stride/linesize of the image
1114  * @param color color of the arrow
1115  */
1116 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1117     int x, y, fr, f;
1118
1119     sx= av_clip(sx, 0, w-1);
1120     sy= av_clip(sy, 0, h-1);
1121     ex= av_clip(ex, 0, w-1);
1122     ey= av_clip(ey, 0, h-1);
1123
1124     buf[sy*stride + sx]+= color;
1125
1126     if(FFABS(ex - sx) > FFABS(ey - sy)){
1127         if(sx > ex){
1128             FFSWAP(int, sx, ex);
1129             FFSWAP(int, sy, ey);
1130         }
1131         buf+= sx + sy*stride;
1132         ex-= sx;
1133         f= ((ey-sy)<<16)/ex;
1134         for(x= 0; x <= ex; x++){
1135             y = (x*f)>>16;
1136             fr= (x*f)&0xFFFF;
1137             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1138             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1139         }
1140     }else{
1141         if(sy > ey){
1142             FFSWAP(int, sx, ex);
1143             FFSWAP(int, sy, ey);
1144         }
1145         buf+= sx + sy*stride;
1146         ey-= sy;
1147         if(ey) f= ((ex-sx)<<16)/ey;
1148         else   f= 0;
1149         for(y= 0; y <= ey; y++){
1150             x = (y*f)>>16;
1151             fr= (y*f)&0xFFFF;
1152             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;
1153             buf[y*stride + x+1]+= (color*         fr )>>16;
1154         }
1155     }
1156 }
1157
1158 /**
1159  * draws an arrow from (ex, ey) -> (sx, sy).
1160  * @param w width of the image
1161  * @param h height of the image
1162  * @param stride stride/linesize of the image
1163  * @param color color of the arrow
1164  */
1165 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1166     int dx,dy;
1167
1168     sx= av_clip(sx, -100, w+100);
1169     sy= av_clip(sy, -100, h+100);
1170     ex= av_clip(ex, -100, w+100);
1171     ey= av_clip(ey, -100, h+100);
1172
1173     dx= ex - sx;
1174     dy= ey - sy;
1175
1176     if(dx*dx + dy*dy > 3*3){
1177         int rx=  dx + dy;
1178         int ry= -dx + dy;
1179         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1180
1181         //FIXME subpixel accuracy
1182         rx= ROUNDED_DIV(rx*3<<4, length);
1183         ry= ROUNDED_DIV(ry*3<<4, length);
1184
1185         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1186         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1187     }
1188     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1189 }
1190
1191 /**
1192  * prints debuging info for the given picture.
1193  */
1194 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1195
1196     if(s->avctx->hwaccel || !pict || !pict->mb_type) return;
1197
1198     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1199         int x,y;
1200
1201         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1202         switch (pict->pict_type) {
1203             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1204             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1205             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1206             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1207             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1208             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1209         }
1210         for(y=0; y<s->mb_height; y++){
1211             for(x=0; x<s->mb_width; x++){
1212                 if(s->avctx->debug&FF_DEBUG_SKIP){
1213                     int count= s->mbskip_table[x + y*s->mb_stride];
1214                     if(count>9) count=9;
1215                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1216                 }
1217                 if(s->avctx->debug&FF_DEBUG_QP){
1218                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1219                 }
1220                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1221                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1222                     //Type & MV direction
1223                     if(IS_PCM(mb_type))
1224                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1225                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1226                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1227                     else if(IS_INTRA4x4(mb_type))
1228                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1229                     else if(IS_INTRA16x16(mb_type))
1230                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1231                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1232                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1233                     else if(IS_DIRECT(mb_type))
1234                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1235                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1236                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1237                     else if(IS_GMC(mb_type))
1238                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1239                     else if(IS_SKIP(mb_type))
1240                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1241                     else if(!USES_LIST(mb_type, 1))
1242                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1243                     else if(!USES_LIST(mb_type, 0))
1244                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1245                     else{
1246                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1247                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1248                     }
1249
1250                     //segmentation
1251                     if(IS_8X8(mb_type))
1252                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1253                     else if(IS_16X8(mb_type))
1254                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1255                     else if(IS_8X16(mb_type))
1256                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1257                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1258                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1259                     else
1260                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1261
1262
1263                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1264                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1265                     else
1266                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1267                 }
1268 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1269             }
1270             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1271         }
1272     }
1273
1274     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1275         const int shift= 1 + s->quarter_sample;
1276         int mb_y;
1277         uint8_t *ptr;
1278         int i;
1279         int h_chroma_shift, v_chroma_shift, block_height;
1280         const int width = s->avctx->width;
1281         const int height= s->avctx->height;
1282         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1283         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1284         s->low_delay=0; //needed to see the vectors without trashing the buffers
1285
1286         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1287         for(i=0; i<3; i++){
1288             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1289             pict->data[i]= s->visualization_buffer[i];
1290         }
1291         pict->type= FF_BUFFER_TYPE_COPY;
1292         ptr= pict->data[0];
1293         block_height = 16>>v_chroma_shift;
1294
1295         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1296             int mb_x;
1297             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1298                 const int mb_index= mb_x + mb_y*s->mb_stride;
1299                 if((s->avctx->debug_mv) && pict->motion_val){
1300                   int type;
1301                   for(type=0; type<3; type++){
1302                     int direction = 0;
1303                     switch (type) {
1304                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1305                                 continue;
1306                               direction = 0;
1307                               break;
1308                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1309                                 continue;
1310                               direction = 0;
1311                               break;
1312                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1313                                 continue;
1314                               direction = 1;
1315                               break;
1316                     }
1317                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1318                         continue;
1319
1320                     if(IS_8X8(pict->mb_type[mb_index])){
1321                       int i;
1322                       for(i=0; i<4; i++){
1323                         int sx= mb_x*16 + 4 + 8*(i&1);
1324                         int sy= mb_y*16 + 4 + 8*(i>>1);
1325                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1326                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1327                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1328                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1329                       }
1330                     }else if(IS_16X8(pict->mb_type[mb_index])){
1331                       int i;
1332                       for(i=0; i<2; i++){
1333                         int sx=mb_x*16 + 8;
1334                         int sy=mb_y*16 + 4 + 8*i;
1335                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1336                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1337                         int my=(pict->motion_val[direction][xy][1]>>shift);
1338
1339                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1340                             my*=2;
1341
1342                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1343                       }
1344                     }else if(IS_8X16(pict->mb_type[mb_index])){
1345                       int i;
1346                       for(i=0; i<2; i++){
1347                         int sx=mb_x*16 + 4 + 8*i;
1348                         int sy=mb_y*16 + 8;
1349                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1350                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1351                         int my=(pict->motion_val[direction][xy][1]>>shift);
1352
1353                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1354                             my*=2;
1355
1356                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1357                       }
1358                     }else{
1359                       int sx= mb_x*16 + 8;
1360                       int sy= mb_y*16 + 8;
1361                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1362                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1363                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1364                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1365                     }
1366                   }
1367                 }
1368                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1369                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1370                     int y;
1371                     for(y=0; y<block_height; y++){
1372                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= c;
1373                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= c;
1374                     }
1375                 }
1376                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1377                     int mb_type= pict->mb_type[mb_index];
1378                     uint64_t u,v;
1379                     int y;
1380 #define COLOR(theta, r)\
1381 u= (int)(128 + r*cos(theta*3.141592/180));\
1382 v= (int)(128 + r*sin(theta*3.141592/180));
1383
1384
1385                     u=v=128;
1386                     if(IS_PCM(mb_type)){
1387                         COLOR(120,48)
1388                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1389                         COLOR(30,48)
1390                     }else if(IS_INTRA4x4(mb_type)){
1391                         COLOR(90,48)
1392                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1393 //                        COLOR(120,48)
1394                     }else if(IS_DIRECT(mb_type)){
1395                         COLOR(150,48)
1396                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1397                         COLOR(170,48)
1398                     }else if(IS_GMC(mb_type)){
1399                         COLOR(190,48)
1400                     }else if(IS_SKIP(mb_type)){
1401 //                        COLOR(180,48)
1402                     }else if(!USES_LIST(mb_type, 1)){
1403                         COLOR(240,48)
1404                     }else if(!USES_LIST(mb_type, 0)){
1405                         COLOR(0,48)
1406                     }else{
1407                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1408                         COLOR(300,48)
1409                     }
1410
1411                     u*= 0x0101010101010101ULL;
1412                     v*= 0x0101010101010101ULL;
1413                     for(y=0; y<block_height; y++){
1414                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= u;
1415                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= v;
1416                     }
1417
1418                     //segmentation
1419                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1420                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1421                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1422                     }
1423                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1424                         for(y=0; y<16; y++)
1425                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1426                     }
1427                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1428                         int dm= 1 << (mv_sample_log2-2);
1429                         for(i=0; i<4; i++){
1430                             int sx= mb_x*16 + 8*(i&1);
1431                             int sy= mb_y*16 + 8*(i>>1);
1432                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1433                             //FIXME bidir
1434                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1435                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1436                                 for(y=0; y<8; y++)
1437                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1438                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1439                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1440                         }
1441                     }
1442
1443                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1444                         // hmm
1445                     }
1446                 }
1447                 s->mbskip_table[mb_index]=0;
1448             }
1449         }
1450     }
1451 }
1452
1453 static inline int hpel_motion_lowres(MpegEncContext *s,
1454                                   uint8_t *dest, uint8_t *src,
1455                                   int field_based, int field_select,
1456                                   int src_x, int src_y,
1457                                   int width, int height, int stride,
1458                                   int h_edge_pos, int v_edge_pos,
1459                                   int w, int h, h264_chroma_mc_func *pix_op,
1460                                   int motion_x, int motion_y)
1461 {
1462     const int lowres= s->avctx->lowres;
1463     const int op_index= FFMIN(lowres, 2);
1464     const int s_mask= (2<<lowres)-1;
1465     int emu=0;
1466     int sx, sy;
1467
1468     if(s->quarter_sample){
1469         motion_x/=2;
1470         motion_y/=2;
1471     }
1472
1473     sx= motion_x & s_mask;
1474     sy= motion_y & s_mask;
1475     src_x += motion_x >> (lowres+1);
1476     src_y += motion_y >> (lowres+1);
1477
1478     src += src_y * stride + src_x;
1479
1480     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1481        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1482         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1483                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1484         src= s->edge_emu_buffer;
1485         emu=1;
1486     }
1487
1488     sx= (sx << 2) >> lowres;
1489     sy= (sy << 2) >> lowres;
1490     if(field_select)
1491         src += s->linesize;
1492     pix_op[op_index](dest, src, stride, h, sx, sy);
1493     return emu;
1494 }
1495
1496 /* apply one mpeg motion vector to the three components */
1497 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1498                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1499                                int field_based, int bottom_field, int field_select,
1500                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1501                                int motion_x, int motion_y, int h, int mb_y)
1502 {
1503     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1504     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1505     const int lowres= s->avctx->lowres;
1506     const int op_index= FFMIN(lowres, 2);
1507     const int block_s= 8>>lowres;
1508     const int s_mask= (2<<lowres)-1;
1509     const int h_edge_pos = s->h_edge_pos >> lowres;
1510     const int v_edge_pos = s->v_edge_pos >> lowres;
1511     linesize   = s->current_picture.linesize[0] << field_based;
1512     uvlinesize = s->current_picture.linesize[1] << field_based;
1513
1514     if(s->quarter_sample){ //FIXME obviously not perfect but qpel will not work in lowres anyway
1515         motion_x/=2;
1516         motion_y/=2;
1517     }
1518
1519     if(field_based){
1520         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1521     }
1522
1523     sx= motion_x & s_mask;
1524     sy= motion_y & s_mask;
1525     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1526     src_y =(   mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1527
1528     if (s->out_format == FMT_H263) {
1529         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1530         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1531         uvsrc_x = src_x>>1;
1532         uvsrc_y = src_y>>1;
1533     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1534         mx = motion_x / 4;
1535         my = motion_y / 4;
1536         uvsx = (2*mx) & s_mask;
1537         uvsy = (2*my) & s_mask;
1538         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1539         uvsrc_y =    mb_y*block_s               + (my >> lowres);
1540     } else {
1541         mx = motion_x / 2;
1542         my = motion_y / 2;
1543         uvsx = mx & s_mask;
1544         uvsy = my & s_mask;
1545         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1546         uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
1547     }
1548
1549     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1550     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1551     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1552
1553     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1554        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1555             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1556                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1557             ptr_y = s->edge_emu_buffer;
1558             if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1559                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1560                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1561                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1562                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1563                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1564                 ptr_cb= uvbuf;
1565                 ptr_cr= uvbuf+16;
1566             }
1567     }
1568
1569     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1570         dest_y += s->linesize;
1571         dest_cb+= s->uvlinesize;
1572         dest_cr+= s->uvlinesize;
1573     }
1574
1575     if(field_select){
1576         ptr_y += s->linesize;
1577         ptr_cb+= s->uvlinesize;
1578         ptr_cr+= s->uvlinesize;
1579     }
1580
1581     sx= (sx << 2) >> lowres;
1582     sy= (sy << 2) >> lowres;
1583     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1584
1585     if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1586         uvsx= (uvsx << 2) >> lowres;
1587         uvsy= (uvsy << 2) >> lowres;
1588         pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1589         pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1590     }
1591     //FIXME h261 lowres loop filter
1592 }
1593
1594 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1595                                      uint8_t *dest_cb, uint8_t *dest_cr,
1596                                      uint8_t **ref_picture,
1597                                      h264_chroma_mc_func *pix_op,
1598                                      int mx, int my){
1599     const int lowres= s->avctx->lowres;
1600     const int op_index= FFMIN(lowres, 2);
1601     const int block_s= 8>>lowres;
1602     const int s_mask= (2<<lowres)-1;
1603     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1604     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1605     int emu=0, src_x, src_y, offset, sx, sy;
1606     uint8_t *ptr;
1607
1608     if(s->quarter_sample){
1609         mx/=2;
1610         my/=2;
1611     }
1612
1613     /* In case of 8X8, we construct a single chroma motion vector
1614        with a special rounding */
1615     mx= ff_h263_round_chroma(mx);
1616     my= ff_h263_round_chroma(my);
1617
1618     sx= mx & s_mask;
1619     sy= my & s_mask;
1620     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1621     src_y = s->mb_y*block_s + (my >> (lowres+1));
1622
1623     offset = src_y * s->uvlinesize + src_x;
1624     ptr = ref_picture[1] + offset;
1625     if(s->flags&CODEC_FLAG_EMU_EDGE){
1626         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1627            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1628             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1629             ptr= s->edge_emu_buffer;
1630             emu=1;
1631         }
1632     }
1633     sx= (sx << 2) >> lowres;
1634     sy= (sy << 2) >> lowres;
1635     pix_op[op_index](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1636
1637     ptr = ref_picture[2] + offset;
1638     if(emu){
1639         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1640         ptr= s->edge_emu_buffer;
1641     }
1642     pix_op[op_index](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1643 }
1644
1645 /**
1646  * motion compensation of a single macroblock
1647  * @param s context
1648  * @param dest_y luma destination pointer
1649  * @param dest_cb chroma cb/u destination pointer
1650  * @param dest_cr chroma cr/v destination pointer
1651  * @param dir direction (0->forward, 1->backward)
1652  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1653  * @param pix_op halfpel motion compensation function (average or put normally)
1654  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1655  */
1656 static inline void MPV_motion_lowres(MpegEncContext *s,
1657                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1658                               int dir, uint8_t **ref_picture,
1659                               h264_chroma_mc_func *pix_op)
1660 {
1661     int mx, my;
1662     int mb_x, mb_y, i;
1663     const int lowres= s->avctx->lowres;
1664     const int block_s= 8>>lowres;
1665
1666     mb_x = s->mb_x;
1667     mb_y = s->mb_y;
1668
1669     switch(s->mv_type) {
1670     case MV_TYPE_16X16:
1671         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1672                     0, 0, 0,
1673                     ref_picture, pix_op,
1674                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y);
1675         break;
1676     case MV_TYPE_8X8:
1677         mx = 0;
1678         my = 0;
1679             for(i=0;i<4;i++) {
1680                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1681                             ref_picture[0], 0, 0,
1682                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1683                             s->width, s->height, s->linesize,
1684                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1685                             block_s, block_s, pix_op,
1686                             s->mv[dir][i][0], s->mv[dir][i][1]);
1687
1688                 mx += s->mv[dir][i][0];
1689                 my += s->mv[dir][i][1];
1690             }
1691
1692         if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1693             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1694         break;
1695     case MV_TYPE_FIELD:
1696         if (s->picture_structure == PICT_FRAME) {
1697             /* top field */
1698             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1699                         1, 0, s->field_select[dir][0],
1700                         ref_picture, pix_op,
1701                         s->mv[dir][0][0], s->mv[dir][0][1], block_s, mb_y);
1702             /* bottom field */
1703             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1704                         1, 1, s->field_select[dir][1],
1705                         ref_picture, pix_op,
1706                         s->mv[dir][1][0], s->mv[dir][1][1], block_s, mb_y);
1707         } else {
1708             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != FF_B_TYPE && !s->first_field){
1709                 ref_picture= s->current_picture_ptr->data;
1710             }
1711
1712             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1713                         0, 0, s->field_select[dir][0],
1714                         ref_picture, pix_op,
1715                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y>>1);
1716         }
1717         break;
1718     case MV_TYPE_16X8:
1719         for(i=0; i<2; i++){
1720             uint8_t ** ref2picture;
1721
1722             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == FF_B_TYPE || s->first_field){
1723                 ref2picture= ref_picture;
1724             }else{
1725                 ref2picture= s->current_picture_ptr->data;
1726             }
1727
1728             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1729                         0, 0, s->field_select[dir][i],
1730                         ref2picture, pix_op,
1731                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s, mb_y>>1);
1732
1733             dest_y += 2*block_s*s->linesize;
1734             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1735             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1736         }
1737         break;
1738     case MV_TYPE_DMV:
1739         if(s->picture_structure == PICT_FRAME){
1740             for(i=0; i<2; i++){
1741                 int j;
1742                 for(j=0; j<2; j++){
1743                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1744                                 1, j, j^i,
1745                                 ref_picture, pix_op,
1746                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s, mb_y);
1747                 }
1748                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1749             }
1750         }else{
1751             for(i=0; i<2; i++){
1752                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1753                             0, 0, s->picture_structure != i+1,
1754                             ref_picture, pix_op,
1755                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s, mb_y>>1);
1756
1757                 // after put we make avg of the same block
1758                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1759
1760                 //opposite parity is always in the same frame if this is second field
1761                 if(!s->first_field){
1762                     ref_picture = s->current_picture_ptr->data;
1763                 }
1764             }
1765         }
1766     break;
1767     default: assert(0);
1768     }
1769 }
1770
1771 /* put block[] to dest[] */
1772 static inline void put_dct(MpegEncContext *s,
1773                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1774 {
1775     s->dct_unquantize_intra(s, block, i, qscale);
1776     s->dsp.idct_put (dest, line_size, block);
1777 }
1778
1779 /* add block[] to dest[] */
1780 static inline void add_dct(MpegEncContext *s,
1781                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1782 {
1783     if (s->block_last_index[i] >= 0) {
1784         s->dsp.idct_add (dest, line_size, block);
1785     }
1786 }
1787
1788 static inline void add_dequant_dct(MpegEncContext *s,
1789                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1790 {
1791     if (s->block_last_index[i] >= 0) {
1792         s->dct_unquantize_inter(s, block, i, qscale);
1793
1794         s->dsp.idct_add (dest, line_size, block);
1795     }
1796 }
1797
1798 /**
1799  * cleans dc, ac, coded_block for the current non intra MB
1800  */
1801 void ff_clean_intra_table_entries(MpegEncContext *s)
1802 {
1803     int wrap = s->b8_stride;
1804     int xy = s->block_index[0];
1805
1806     s->dc_val[0][xy           ] =
1807     s->dc_val[0][xy + 1       ] =
1808     s->dc_val[0][xy     + wrap] =
1809     s->dc_val[0][xy + 1 + wrap] = 1024;
1810     /* ac pred */
1811     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1812     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1813     if (s->msmpeg4_version>=3) {
1814         s->coded_block[xy           ] =
1815         s->coded_block[xy + 1       ] =
1816         s->coded_block[xy     + wrap] =
1817         s->coded_block[xy + 1 + wrap] = 0;
1818     }
1819     /* chroma */
1820     wrap = s->mb_stride;
1821     xy = s->mb_x + s->mb_y * wrap;
1822     s->dc_val[1][xy] =
1823     s->dc_val[2][xy] = 1024;
1824     /* ac pred */
1825     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1826     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1827
1828     s->mbintra_table[xy]= 0;
1829 }
1830
1831 /* generic function called after a macroblock has been parsed by the
1832    decoder or after it has been encoded by the encoder.
1833
1834    Important variables used:
1835    s->mb_intra : true if intra macroblock
1836    s->mv_dir   : motion vector direction
1837    s->mv_type  : motion vector type
1838    s->mv       : motion vector
1839    s->interlaced_dct : true if interlaced dct used (mpeg2)
1840  */
1841 static av_always_inline
1842 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1843                             int lowres_flag, int is_mpeg12)
1844 {
1845     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1846     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1847         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1848         return;
1849     }
1850
1851     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1852        /* save DCT coefficients */
1853        int i,j;
1854        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1855        for(i=0; i<6; i++)
1856            for(j=0; j<64; j++)
1857                *dct++ = block[i][s->dsp.idct_permutation[j]];
1858     }
1859
1860     s->current_picture.qscale_table[mb_xy]= s->qscale;
1861
1862     /* update DC predictors for P macroblocks */
1863     if (!s->mb_intra) {
1864         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1865             if(s->mbintra_table[mb_xy])
1866                 ff_clean_intra_table_entries(s);
1867         } else {
1868             s->last_dc[0] =
1869             s->last_dc[1] =
1870             s->last_dc[2] = 128 << s->intra_dc_precision;
1871         }
1872     }
1873     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1874         s->mbintra_table[mb_xy]=1;
1875
1876     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1877         uint8_t *dest_y, *dest_cb, *dest_cr;
1878         int dct_linesize, dct_offset;
1879         op_pixels_func (*op_pix)[4];
1880         qpel_mc_func (*op_qpix)[16];
1881         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1882         const int uvlinesize= s->current_picture.linesize[1];
1883         const int readable= s->pict_type != FF_B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1884         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1885
1886         /* avoid copy if macroblock skipped in last frame too */
1887         /* skip only during decoding as we might trash the buffers during encoding a bit */
1888         if(!s->encoding){
1889             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1890             const int age= s->current_picture.age;
1891
1892             assert(age);
1893
1894             if (s->mb_skipped) {
1895                 s->mb_skipped= 0;
1896                 assert(s->pict_type!=FF_I_TYPE);
1897
1898                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1899                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1900
1901                 /* if previous was skipped too, then nothing to do !  */
1902                 if (*mbskip_ptr >= age && s->current_picture.reference){
1903                     return;
1904                 }
1905             } else if(!s->current_picture.reference){
1906                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1907                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1908             } else{
1909                 *mbskip_ptr = 0; /* not skipped */
1910             }
1911         }
1912
1913         dct_linesize = linesize << s->interlaced_dct;
1914         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1915
1916         if(readable){
1917             dest_y=  s->dest[0];
1918             dest_cb= s->dest[1];
1919             dest_cr= s->dest[2];
1920         }else{
1921             dest_y = s->b_scratchpad;
1922             dest_cb= s->b_scratchpad+16*linesize;
1923             dest_cr= s->b_scratchpad+32*linesize;
1924         }
1925
1926         if (!s->mb_intra) {
1927             /* motion handling */
1928             /* decoding or more than one mb_type (MC was already done otherwise) */
1929             if(!s->encoding){
1930                 if(lowres_flag){
1931                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1932
1933                     if (s->mv_dir & MV_DIR_FORWARD) {
1934                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1935                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1936                     }
1937                     if (s->mv_dir & MV_DIR_BACKWARD) {
1938                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1939                     }
1940                 }else{
1941                     op_qpix= s->me.qpel_put;
1942                     if ((!s->no_rounding) || s->pict_type==FF_B_TYPE){
1943                         op_pix = s->dsp.put_pixels_tab;
1944                     }else{
1945                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1946                     }
1947                     if (s->mv_dir & MV_DIR_FORWARD) {
1948                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1949                         op_pix = s->dsp.avg_pixels_tab;
1950                         op_qpix= s->me.qpel_avg;
1951                     }
1952                     if (s->mv_dir & MV_DIR_BACKWARD) {
1953                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1954                     }
1955                 }
1956             }
1957
1958             /* skip dequant / idct if we are really late ;) */
1959             if(s->hurry_up>1) goto skip_idct;
1960             if(s->avctx->skip_idct){
1961                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
1962                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
1963                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1964                     goto skip_idct;
1965             }
1966
1967             /* add dct residue */
1968             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1969                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1970                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1971                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1972                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1973                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1974
1975                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1976                     if (s->chroma_y_shift){
1977                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1978                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1979                     }else{
1980                         dct_linesize >>= 1;
1981                         dct_offset >>=1;
1982                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1983                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1984                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1985                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1986                     }
1987                 }
1988             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
1989                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1990                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1991                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1992                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1993
1994                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1995                     if(s->chroma_y_shift){//Chroma420
1996                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
1997                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
1998                     }else{
1999                         //chroma422
2000                         dct_linesize = uvlinesize << s->interlaced_dct;
2001                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2002
2003                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2004                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2005                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2006                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2007                         if(!s->chroma_x_shift){//Chroma444
2008                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2009                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2010                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2011                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2012                         }
2013                     }
2014                 }//fi gray
2015             }
2016             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
2017                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2018             }
2019         } else {
2020             /* dct only in intra block */
2021             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2022                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2023                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2024                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2025                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2026
2027                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2028                     if(s->chroma_y_shift){
2029                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2030                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2031                     }else{
2032                         dct_offset >>=1;
2033                         dct_linesize >>=1;
2034                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2035                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2036                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2037                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2038                     }
2039                 }
2040             }else{
2041                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2042                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2043                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2044                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2045
2046                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2047                     if(s->chroma_y_shift){
2048                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2049                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2050                     }else{
2051
2052                         dct_linesize = uvlinesize << s->interlaced_dct;
2053                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2054
2055                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2056                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2057                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2058                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2059                         if(!s->chroma_x_shift){//Chroma444
2060                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2061                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2062                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2063                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2064                         }
2065                     }
2066                 }//gray
2067             }
2068         }
2069 skip_idct:
2070         if(!readable){
2071             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2072             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2073             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2074         }
2075     }
2076 }
2077
2078 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2079 #if !CONFIG_SMALL
2080     if(s->out_format == FMT_MPEG1) {
2081         if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
2082         else                 MPV_decode_mb_internal(s, block, 0, 1);
2083     } else
2084 #endif
2085     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
2086     else                  MPV_decode_mb_internal(s, block, 0, 0);
2087 }
2088
2089 /**
2090  *
2091  * @param h is the normal height, this will be reduced automatically if needed for the last row
2092  */
2093 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2094     if (s->avctx->draw_horiz_band) {
2095         AVFrame *src;
2096         const int field_pic= s->picture_structure != PICT_FRAME;
2097         int offset[4];
2098
2099         h= FFMIN(h, (s->avctx->height>>field_pic) - y);
2100
2101         if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
2102             h <<= 1;
2103             y <<= 1;
2104             if(s->first_field) return;
2105         }
2106
2107         if(s->pict_type==FF_B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2108             src= (AVFrame*)s->current_picture_ptr;
2109         else if(s->last_picture_ptr)
2110             src= (AVFrame*)s->last_picture_ptr;
2111         else
2112             return;
2113
2114         if(s->pict_type==FF_B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2115             offset[0]=
2116             offset[1]=
2117             offset[2]=
2118             offset[3]= 0;
2119         }else{
2120             offset[0]= y * s->linesize;
2121             offset[1]=
2122             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2123             offset[3]= 0;
2124         }
2125
2126         emms_c();
2127
2128         s->avctx->draw_horiz_band(s->avctx, src, offset,
2129                                   y, s->picture_structure, h);
2130     }
2131 }
2132
2133 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2134     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2135     const int uvlinesize= s->current_picture.linesize[1];
2136     const int mb_size= 4 - s->avctx->lowres;
2137
2138     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2139     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2140     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2141     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2142     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2143     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2144     //block_index is not used by mpeg2, so it is not affected by chroma_format
2145
2146     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2147     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2148     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2149
2150     if(!(s->pict_type==FF_B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2151     {
2152         if(s->picture_structure==PICT_FRAME){
2153         s->dest[0] += s->mb_y *   linesize << mb_size;
2154         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2155         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2156         }else{
2157             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2158             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2159             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2160             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2161         }
2162     }
2163 }
2164
2165 void ff_mpeg_flush(AVCodecContext *avctx){
2166     int i;
2167     MpegEncContext *s = avctx->priv_data;
2168
2169     if(s==NULL || s->picture==NULL)
2170         return;
2171
2172     for(i=0; i<MAX_PICTURE_COUNT; i++){
2173        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2174                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2175         free_frame_buffer(s, &s->picture[i]);
2176     }
2177     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2178
2179     s->mb_x= s->mb_y= 0;
2180     s->closed_gop= 0;
2181
2182     s->parse_context.state= -1;
2183     s->parse_context.frame_start_found= 0;
2184     s->parse_context.overread= 0;
2185     s->parse_context.overread_index= 0;
2186     s->parse_context.index= 0;
2187     s->parse_context.last_index= 0;
2188     s->bitstream_buffer_size=0;
2189     s->pp_time=0;
2190 }
2191
2192 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2193                                    DCTELEM *block, int n, int qscale)
2194 {
2195     int i, level, nCoeffs;
2196     const uint16_t *quant_matrix;
2197
2198     nCoeffs= s->block_last_index[n];
2199
2200     if (n < 4)
2201         block[0] = block[0] * s->y_dc_scale;
2202     else
2203         block[0] = block[0] * s->c_dc_scale;
2204     /* XXX: only mpeg1 */
2205     quant_matrix = s->intra_matrix;
2206     for(i=1;i<=nCoeffs;i++) {
2207         int j= s->intra_scantable.permutated[i];
2208         level = block[j];
2209         if (level) {
2210             if (level < 0) {
2211                 level = -level;
2212                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2213                 level = (level - 1) | 1;
2214                 level = -level;
2215             } else {
2216                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2217                 level = (level - 1) | 1;
2218             }
2219             block[j] = level;
2220         }
2221     }
2222 }
2223
2224 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2225                                    DCTELEM *block, int n, int qscale)
2226 {
2227     int i, level, nCoeffs;
2228     const uint16_t *quant_matrix;
2229
2230     nCoeffs= s->block_last_index[n];
2231
2232     quant_matrix = s->inter_matrix;
2233     for(i=0; i<=nCoeffs; i++) {
2234         int j= s->intra_scantable.permutated[i];
2235         level = block[j];
2236         if (level) {
2237             if (level < 0) {
2238                 level = -level;
2239                 level = (((level << 1) + 1) * qscale *
2240                          ((int) (quant_matrix[j]))) >> 4;
2241                 level = (level - 1) | 1;
2242                 level = -level;
2243             } else {
2244                 level = (((level << 1) + 1) * qscale *
2245                          ((int) (quant_matrix[j]))) >> 4;
2246                 level = (level - 1) | 1;
2247             }
2248             block[j] = level;
2249         }
2250     }
2251 }
2252
2253 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2254                                    DCTELEM *block, int n, int qscale)
2255 {
2256     int i, level, nCoeffs;
2257     const uint16_t *quant_matrix;
2258
2259     if(s->alternate_scan) nCoeffs= 63;
2260     else nCoeffs= s->block_last_index[n];
2261
2262     if (n < 4)
2263         block[0] = block[0] * s->y_dc_scale;
2264     else
2265         block[0] = block[0] * s->c_dc_scale;
2266     quant_matrix = s->intra_matrix;
2267     for(i=1;i<=nCoeffs;i++) {
2268         int j= s->intra_scantable.permutated[i];
2269         level = block[j];
2270         if (level) {
2271             if (level < 0) {
2272                 level = -level;
2273                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2274                 level = -level;
2275             } else {
2276                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2277             }
2278             block[j] = level;
2279         }
2280     }
2281 }
2282
2283 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2284                                    DCTELEM *block, int n, int qscale)
2285 {
2286     int i, level, nCoeffs;
2287     const uint16_t *quant_matrix;
2288     int sum=-1;
2289
2290     if(s->alternate_scan) nCoeffs= 63;
2291     else nCoeffs= s->block_last_index[n];
2292
2293     if (n < 4)
2294         block[0] = block[0] * s->y_dc_scale;
2295     else
2296         block[0] = block[0] * s->c_dc_scale;
2297     quant_matrix = s->intra_matrix;
2298     for(i=1;i<=nCoeffs;i++) {
2299         int j= s->intra_scantable.permutated[i];
2300         level = block[j];
2301         if (level) {
2302             if (level < 0) {
2303                 level = -level;
2304                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2305                 level = -level;
2306             } else {
2307                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2308             }
2309             block[j] = level;
2310             sum+=level;
2311         }
2312     }
2313     block[63]^=sum&1;
2314 }
2315
2316 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2317                                    DCTELEM *block, int n, int qscale)
2318 {
2319     int i, level, nCoeffs;
2320     const uint16_t *quant_matrix;
2321     int sum=-1;
2322
2323     if(s->alternate_scan) nCoeffs= 63;
2324     else nCoeffs= s->block_last_index[n];
2325
2326     quant_matrix = s->inter_matrix;
2327     for(i=0; i<=nCoeffs; i++) {
2328         int j= s->intra_scantable.permutated[i];
2329         level = block[j];
2330         if (level) {
2331             if (level < 0) {
2332                 level = -level;
2333                 level = (((level << 1) + 1) * qscale *
2334                          ((int) (quant_matrix[j]))) >> 4;
2335                 level = -level;
2336             } else {
2337                 level = (((level << 1) + 1) * qscale *
2338                          ((int) (quant_matrix[j]))) >> 4;
2339             }
2340             block[j] = level;
2341             sum+=level;
2342         }
2343     }
2344     block[63]^=sum&1;
2345 }
2346
2347 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2348                                   DCTELEM *block, int n, int qscale)
2349 {
2350     int i, level, qmul, qadd;
2351     int nCoeffs;
2352
2353     assert(s->block_last_index[n]>=0);
2354
2355     qmul = qscale << 1;
2356
2357     if (!s->h263_aic) {
2358         if (n < 4)
2359             block[0] = block[0] * s->y_dc_scale;
2360         else
2361             block[0] = block[0] * s->c_dc_scale;
2362         qadd = (qscale - 1) | 1;
2363     }else{
2364         qadd = 0;
2365     }
2366     if(s->ac_pred)
2367         nCoeffs=63;
2368     else
2369         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2370
2371     for(i=1; i<=nCoeffs; i++) {
2372         level = block[i];
2373         if (level) {
2374             if (level < 0) {
2375                 level = level * qmul - qadd;
2376             } else {
2377                 level = level * qmul + qadd;
2378             }
2379             block[i] = level;
2380         }
2381     }
2382 }
2383
2384 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2385                                   DCTELEM *block, int n, int qscale)
2386 {
2387     int i, level, qmul, qadd;
2388     int nCoeffs;
2389
2390     assert(s->block_last_index[n]>=0);
2391
2392     qadd = (qscale - 1) | 1;
2393     qmul = qscale << 1;
2394
2395     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2396
2397     for(i=0; i<=nCoeffs; i++) {
2398         level = block[i];
2399         if (level) {
2400             if (level < 0) {
2401                 level = level * qmul - qadd;
2402             } else {
2403                 level = level * qmul + qadd;
2404             }
2405             block[i] = level;
2406         }
2407     }
2408 }
2409
2410 /**
2411  * set qscale and update qscale dependent variables.
2412  */
2413 void ff_set_qscale(MpegEncContext * s, int qscale)
2414 {
2415     if (qscale < 1)
2416         qscale = 1;
2417     else if (qscale > 31)
2418         qscale = 31;
2419
2420     s->qscale = qscale;
2421     s->chroma_qscale= s->chroma_qscale_table[qscale];
2422
2423     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2424     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2425 }