git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavcore/imgutils.h"
  32 #include "avcodec.h"
  33 #include "dsputil.h"
  34 #include "internal.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "mjpegenc.h"
  38 #include "msmpeg4.h"
  39 #include "faandct.h"
  40 #include "xvmc_internal.h"
  41 #include <limits.h>
  42
  43 //#undef NDEBUG
  44 //#include <assert.h>
  45
  46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  55                                    DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60
  61
  62 /* enable all paranoid tests for rounding, overflows, etc... */
  63 //#define PARANOID
  64
  65 //#define DEBUG
  66
  67
  68 static const uint8_t ff_default_chroma_qscale_table[32]={
  69 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  70     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  71 };
  72
  73 const uint8_t ff_mpeg1_dc_scale_table[128]={
  74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  75     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  76     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  77     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  78     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  79 };
  80
  81 static const uint8_t mpeg2_dc_scale_table1[128]={
  82 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  84     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  85     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  86     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  87 };
  88
  89 static const uint8_t mpeg2_dc_scale_table2[128]={
  90 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  91     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95 };
  96
  97 static const uint8_t mpeg2_dc_scale_table3[128]={
  98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  99     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 100     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 101     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 102     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 103 };
 104
 105 const uint8_t * const ff_mpeg2_dc_scale_table[4]={
 106     ff_mpeg1_dc_scale_table,
 107     mpeg2_dc_scale_table1,
 108     mpeg2_dc_scale_table2,
 109     mpeg2_dc_scale_table3,
 110 };
 111
 112 const enum PixelFormat ff_pixfmt_list_420[] = {
 113     PIX_FMT_YUV420P,
 114     PIX_FMT_NONE
 115 };
 116
 117 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
 118     PIX_FMT_DXVA2_VLD,
 119     PIX_FMT_VAAPI_VLD,
 120     PIX_FMT_YUV420P,
 121     PIX_FMT_NONE
 122 };
 123
 124 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 125     int i;
 126
 127     assert(p<=end);
 128     if(p>=end)
 129         return end;
 130
 131     for(i=0; i<3; i++){
 132         uint32_t tmp= *state << 8;
 133         *state= tmp + *(p++);
 134         if(tmp == 0x100 || p==end)
 135             return p;
 136     }
 137
 138     while(p<end){
 139         if     (p[-1] > 1      ) p+= 3;
 140         else if(p[-2]          ) p+= 2;
 141         else if(p[-3]|(p[-1]-1)) p++;
 142         else{
 143             p++;
 144             break;
 145         }
 146     }
 147
 148     p= FFMIN(p, end)-4;
 149     *state= AV_RB32(p);
 150
 151     return p+4;
 152 }
 153
 154 /* init common dct for both encoder and decoder */
 155 av_cold int ff_dct_common_init(MpegEncContext *s)
 156 {
 157     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 158     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 159     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 160     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 161     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 162     if(s->flags & CODEC_FLAG_BITEXACT)
 163         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 164     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 165
 166 #if   HAVE_MMX
 167     MPV_common_init_mmx(s);
 168 #elif ARCH_ALPHA
 169     MPV_common_init_axp(s);
 170 #elif CONFIG_MLIB
 171     MPV_common_init_mlib(s);
 172 #elif HAVE_MMI
 173     MPV_common_init_mmi(s);
 174 #elif ARCH_ARM
 175     MPV_common_init_arm(s);
 176 #elif HAVE_ALTIVEC
 177     MPV_common_init_altivec(s);
 178 #elif ARCH_BFIN
 179     MPV_common_init_bfin(s);
 180 #endif
 181
 182     /* load & permutate scantables
 183        note: only wmv uses different ones
 184     */
 185     if(s->alternate_scan){
 186         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 187         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 188     }else{
 189         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 190         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 191     }
 192     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 193     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 194
 195     return 0;
 196 }
 197
 198 void ff_copy_picture(Picture *dst, Picture *src){
 199     *dst = *src;
 200     dst->type= FF_BUFFER_TYPE_COPY;
 201 }
 202
 203 /**
 204  * Release a frame buffer
 205  */
 206 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 207 {
 208     s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 209     av_freep(&pic->hwaccel_picture_private);
 210 }
 211
 212 /**
 213  * Allocate a frame buffer
 214  */
 215 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
 216 {
 217     int r;
 218
 219     if (s->avctx->hwaccel) {
 220         assert(!pic->hwaccel_picture_private);
 221         if (s->avctx->hwaccel->priv_data_size) {
 222             pic->hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
 223             if (!pic->hwaccel_picture_private) {
 224                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
 225                 return -1;
 226             }
 227         }
 228     }
 229
 230     r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 231
 232     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
 233         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 234         av_freep(&pic->hwaccel_picture_private);
 235         return -1;
 236     }
 237
 238     if (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])) {
 239         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 240         free_frame_buffer(s, pic);
 241         return -1;
 242     }
 243
 244     if (pic->linesize[1] != pic->linesize[2]) {
 245         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 246         free_frame_buffer(s, pic);
 247         return -1;
 248     }
 249
 250     return 0;
 251 }
 252
 253 /**
 254  * allocates a Picture
 255  * The pixels are allocated/set by calling get_buffer() if shared=0
 256  */
 257 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 258     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 259     const int mb_array_size= s->mb_stride*s->mb_height;
 260     const int b8_array_size= s->b8_stride*s->mb_height*2;
 261     const int b4_array_size= s->b4_stride*s->mb_height*4;
 262     int i;
 263     int r= -1;
 264
 265     if(shared){
 266         assert(pic->data[0]);
 267         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 268         pic->type= FF_BUFFER_TYPE_SHARED;
 269     }else{
 270         assert(!pic->data[0]);
 271
 272         if (alloc_frame_buffer(s, pic) < 0)
 273             return -1;
 274
 275         s->linesize  = pic->linesize[0];
 276         s->uvlinesize= pic->linesize[1];
 277     }
 278
 279     if(pic->qscale_table==NULL){
 280         if (s->encoding) {
 281             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var   , mb_array_size * sizeof(int16_t)  , fail)
 282             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var, mb_array_size * sizeof(int16_t)  , fail)
 283             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean  , mb_array_size * sizeof(int8_t )  , fail)
 284         }
 285
 286         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2, fail) //the +2 is for the slice end check
 287         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table , mb_array_size * sizeof(uint8_t)  , fail)
 288         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t), fail)
 289         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
 290         if(s->out_format == FMT_H264){
 291             for(i=0; i<2; i++){
 292                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t), fail)
 293                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 294                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 295             }
 296             pic->motion_subsample_log2= 2;
 297         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 298             for(i=0; i<2; i++){
 299                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t), fail)
 300                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 301                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 302             }
 303             pic->motion_subsample_log2= 3;
 304         }
 305         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 306             FF_ALLOCZ_OR_GOTO(s->avctx, pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6, fail)
 307         }
 308         pic->qstride= s->mb_stride;
 309         FF_ALLOCZ_OR_GOTO(s->avctx, pic->pan_scan , 1 * sizeof(AVPanScan), fail)
 310     }
 311
 312     /* It might be nicer if the application would keep track of these
 313      * but it would require an API change. */
 314     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 315     s->prev_pict_types[0]= s->dropable ? FF_B_TYPE : s->pict_type;
 316     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == FF_B_TYPE)
 317         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 318
 319     return 0;
 320 fail: //for the FF_ALLOCZ_OR_GOTO macro
 321     if(r>=0)
 322         free_frame_buffer(s, pic);
 323     return -1;
 324 }
 325
 326 /**
 327  * deallocates a picture
 328  */
 329 static void free_picture(MpegEncContext *s, Picture *pic){
 330     int i;
 331
 332     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 333         free_frame_buffer(s, pic);
 334     }
 335
 336     av_freep(&pic->mb_var);
 337     av_freep(&pic->mc_mb_var);
 338     av_freep(&pic->mb_mean);
 339     av_freep(&pic->mbskip_table);
 340     av_freep(&pic->qscale_table);
 341     av_freep(&pic->mb_type_base);
 342     av_freep(&pic->dct_coeff);
 343     av_freep(&pic->pan_scan);
 344     pic->mb_type= NULL;
 345     for(i=0; i<2; i++){
 346         av_freep(&pic->motion_val_base[i]);
 347         av_freep(&pic->ref_index[i]);
 348     }
 349
 350     if(pic->type == FF_BUFFER_TYPE_SHARED){
 351         for(i=0; i<4; i++){
 352             pic->base[i]=
 353             pic->data[i]= NULL;
 354         }
 355         pic->type= 0;
 356     }
 357 }
 358
 359 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 360     int y_size = s->b8_stride * (2 * s->mb_height + 1);
 361     int c_size = s->mb_stride * (s->mb_height + 1);
 362     int yc_size = y_size + 2 * c_size;
 363     int i;
 364
 365     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 366     FF_ALLOCZ_OR_GOTO(s->avctx, s->allocated_edge_emu_buffer, (s->width+64)*2*21*2, fail); //(width + edge + align)*interlaced*MBsize*tolerance
 367     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 368
 369      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 370     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t), fail)
 371     s->me.temp=         s->me.scratchpad;
 372     s->rd_scratchpad=   s->me.scratchpad;
 373     s->b_scratchpad=    s->me.scratchpad;
 374     s->obmc_scratchpad= s->me.scratchpad + 16;
 375     if (s->encoding) {
 376         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map      , ME_MAP_SIZE*sizeof(uint32_t), fail)
 377         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t), fail)
 378         if(s->avctx->noise_reduction){
 379             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum, 2 * 64 * sizeof(int), fail)
 380         }
 381     }
 382     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64*12*2 * sizeof(DCTELEM), fail)
 383     s->block= s->blocks[0];
 384
 385     for(i=0;i<12;i++){
 386         s->pblocks[i] = &s->block[i];
 387     }
 388
 389     if (s->out_format == FMT_H263) {
 390         /* ac values */
 391         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base, yc_size * sizeof(int16_t) * 16, fail);
 392         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 393         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 394         s->ac_val[2] = s->ac_val[1] + c_size;
 395     }
 396
 397     return 0;
 398 fail:
 399     return -1; //free() through MPV_common_end()
 400 }
 401
 402 static void free_duplicate_context(MpegEncContext *s){
 403     if(s==NULL) return;
 404
 405     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 406     av_freep(&s->me.scratchpad);
 407     s->me.temp=
 408     s->rd_scratchpad=
 409     s->b_scratchpad=
 410     s->obmc_scratchpad= NULL;
 411
 412     av_freep(&s->dct_error_sum);
 413     av_freep(&s->me.map);
 414     av_freep(&s->me.score_map);
 415     av_freep(&s->blocks);
 416     av_freep(&s->ac_val_base);
 417     s->block= NULL;
 418 }
 419
 420 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 421 #define COPY(a) bak->a= src->a
 422     COPY(allocated_edge_emu_buffer);
 423     COPY(edge_emu_buffer);
 424     COPY(me.scratchpad);
 425     COPY(me.temp);
 426     COPY(rd_scratchpad);
 427     COPY(b_scratchpad);
 428     COPY(obmc_scratchpad);
 429     COPY(me.map);
 430     COPY(me.score_map);
 431     COPY(blocks);
 432     COPY(block);
 433     COPY(start_mb_y);
 434     COPY(end_mb_y);
 435     COPY(me.map_generation);
 436     COPY(pb);
 437     COPY(dct_error_sum);
 438     COPY(dct_count[0]);
 439     COPY(dct_count[1]);
 440     COPY(ac_val_base);
 441     COPY(ac_val[0]);
 442     COPY(ac_val[1]);
 443     COPY(ac_val[2]);
 444 #undef COPY
 445 }
 446
 447 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 448     MpegEncContext bak;
 449     int i;
 450     //FIXME copy only needed parts
 451 //START_TIMER
 452     backup_duplicate_context(&bak, dst);
 453     memcpy(dst, src, sizeof(MpegEncContext));
 454     backup_duplicate_context(dst, &bak);
 455     for(i=0;i<12;i++){
 456         dst->pblocks[i] = &dst->block[i];
 457     }
 458 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 459 }
 460
 461 /**
 462  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 463  * the changed fields will not depend upon the prior state of the MpegEncContext.
 464  */
 465 void MPV_common_defaults(MpegEncContext *s){
 466     s->y_dc_scale_table=
 467     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 468     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 469     s->progressive_frame= 1;
 470     s->progressive_sequence= 1;
 471     s->picture_structure= PICT_FRAME;
 472
 473     s->coded_picture_number = 0;
 474     s->picture_number = 0;
 475     s->input_picture_number = 0;
 476
 477     s->picture_in_gop_number = 0;
 478
 479     s->f_code = 1;
 480     s->b_code = 1;
 481 }
 482
 483 /**
 484  * sets the given MpegEncContext to defaults for decoding.
 485  * the changed fields will not depend upon the prior state of the MpegEncContext.
 486  */
 487 void MPV_decode_defaults(MpegEncContext *s){
 488     MPV_common_defaults(s);
 489 }
 490
 491 /**
 492  * init common structure for both encoder and decoder.
 493  * this assumes that some variables like width/height are already set
 494  */
 495 av_cold int MPV_common_init(MpegEncContext *s)
 496 {
 497     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
 498
 499     if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
 500         s->mb_height = (s->height + 31) / 32 * 2;
 501     else
 502         s->mb_height = (s->height + 15) / 16;
 503
 504     if(s->avctx->pix_fmt == PIX_FMT_NONE){
 505         av_log(s->avctx, AV_LOG_ERROR, "decoding to PIX_FMT_NONE is not supported.\n");
 506         return -1;
 507     }
 508
 509     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 510         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 511         return -1;
 512     }
 513
 514     if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
 515         return -1;
 516
 517     dsputil_init(&s->dsp, s->avctx);
 518     ff_dct_common_init(s);
 519
 520     s->flags= s->avctx->flags;
 521     s->flags2= s->avctx->flags2;
 522
 523     s->mb_width  = (s->width  + 15) / 16;
 524     s->mb_stride = s->mb_width + 1;
 525     s->b8_stride = s->mb_width*2 + 1;
 526     s->b4_stride = s->mb_width*4 + 1;
 527     mb_array_size= s->mb_height * s->mb_stride;
 528     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 529
 530     /* set chroma shifts */
 531     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 532                                                     &(s->chroma_y_shift) );
 533
 534     /* set default edge pos, will be overriden in decode_header if needed */
 535     s->h_edge_pos= s->mb_width*16;
 536     s->v_edge_pos= s->mb_height*16;
 537
 538     s->mb_num = s->mb_width * s->mb_height;
 539
 540     s->block_wrap[0]=
 541     s->block_wrap[1]=
 542     s->block_wrap[2]=
 543     s->block_wrap[3]= s->b8_stride;
 544     s->block_wrap[4]=
 545     s->block_wrap[5]= s->mb_stride;
 546
 547     y_size = s->b8_stride * (2 * s->mb_height + 1);
 548     c_size = s->mb_stride * (s->mb_height + 1);
 549     yc_size = y_size + 2 * c_size;
 550
 551     /* convert fourcc to upper case */
 552     s->codec_tag = ff_toupper4(s->avctx->codec_tag);
 553
 554     s->stream_codec_tag = ff_toupper4(s->avctx->stream_codec_tag);
 555
 556     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 557
 558     FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
 559     for(y=0; y<s->mb_height; y++){
 560         for(x=0; x<s->mb_width; x++){
 561             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 562         }
 563     }
 564     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 565
 566     if (s->encoding) {
 567         /* Allocate MV tables */
 568         FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
 569         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 570         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 571         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 572         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 573         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
 574         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 575         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 576         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 577         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 578         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 579         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 580
 581         if(s->msmpeg4_version){
 582             FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
 583         }
 584         FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 585
 586         /* Allocate MB type table */
 587         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
 588
 589         FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
 590
 591         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
 592         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
 593         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 594         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 595         FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 596         FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 597
 598         if(s->avctx->noise_reduction){
 599             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
 600         }
 601     }
 602     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
 603     for(i = 0; i < MAX_PICTURE_COUNT; i++) {
 604         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
 605     }
 606
 607     FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
 608
 609     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 610         /* interlaced direct mode decoding tables */
 611             for(i=0; i<2; i++){
 612                 int j, k;
 613                 for(j=0; j<2; j++){
 614                     for(k=0; k<2; k++){
 615                         FF_ALLOCZ_OR_GOTO(s->avctx,    s->b_field_mv_table_base[i][j][k], mv_table_size * 2 * sizeof(int16_t), fail)
 616                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1;
 617                     }
 618                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j], mb_array_size * 2 * sizeof(uint8_t), fail)
 619                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j], mv_table_size * 2 * sizeof(int16_t), fail)
 620                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]+ s->mb_stride + 1;
 621                 }
 622                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
 623             }
 624     }
 625     if (s->out_format == FMT_H263) {
 626         /* cbp values */
 627         FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
 628         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 629
 630         /* cbp, ac_pred, pred_dir */
 631         FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
 632         FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
 633     }
 634
 635     if (s->h263_pred || s->h263_plus || !s->encoding) {
 636         /* dc values */
 637         //MN: we need these for error resilience of intra-frames
 638         FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
 639         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 640         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 641         s->dc_val[2] = s->dc_val[1] + c_size;
 642         for(i=0;i<yc_size;i++)
 643             s->dc_val_base[i] = 1024;
 644     }
 645
 646     /* which mb is a intra block */
 647     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
 648     memset(s->mbintra_table, 1, mb_array_size);
 649
 650     /* init macroblock skip table */
 651     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
 652     //Note the +1 is for a quicker mpeg4 slice_end detection
 653     FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
 654
 655     s->parse_context.state= -1;
 656     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 657        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 658        s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 659        s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 660     }
 661
 662     s->context_initialized = 1;
 663
 664     s->thread_context[0]= s;
 665     threads = s->avctx->thread_count;
 666
 667     for(i=1; i<threads; i++){
 668         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 669         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 670     }
 671
 672     for(i=0; i<threads; i++){
 673         if(init_duplicate_context(s->thread_context[i], s) < 0)
 674            goto fail;
 675         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 676         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 677     }
 678
 679     return 0;
 680  fail:
 681     MPV_common_end(s);
 682     return -1;
 683 }
 684
 685 /* init common structure for both encoder and decoder */
 686 void MPV_common_end(MpegEncContext *s)
 687 {
 688     int i, j, k;
 689
 690     for(i=0; i<s->avctx->thread_count; i++){
 691         free_duplicate_context(s->thread_context[i]);
 692     }
 693     for(i=1; i<s->avctx->thread_count; i++){
 694         av_freep(&s->thread_context[i]);
 695     }
 696
 697     av_freep(&s->parse_context.buffer);
 698     s->parse_context.buffer_size=0;
 699
 700     av_freep(&s->mb_type);
 701     av_freep(&s->p_mv_table_base);
 702     av_freep(&s->b_forw_mv_table_base);
 703     av_freep(&s->b_back_mv_table_base);
 704     av_freep(&s->b_bidir_forw_mv_table_base);
 705     av_freep(&s->b_bidir_back_mv_table_base);
 706     av_freep(&s->b_direct_mv_table_base);
 707     s->p_mv_table= NULL;
 708     s->b_forw_mv_table= NULL;
 709     s->b_back_mv_table= NULL;
 710     s->b_bidir_forw_mv_table= NULL;
 711     s->b_bidir_back_mv_table= NULL;
 712     s->b_direct_mv_table= NULL;
 713     for(i=0; i<2; i++){
 714         for(j=0; j<2; j++){
 715             for(k=0; k<2; k++){
 716                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 717                 s->b_field_mv_table[i][j][k]=NULL;
 718             }
 719             av_freep(&s->b_field_select_table[i][j]);
 720             av_freep(&s->p_field_mv_table_base[i][j]);
 721             s->p_field_mv_table[i][j]=NULL;
 722         }
 723         av_freep(&s->p_field_select_table[i]);
 724     }
 725
 726     av_freep(&s->dc_val_base);
 727     av_freep(&s->coded_block_base);
 728     av_freep(&s->mbintra_table);
 729     av_freep(&s->cbp_table);
 730     av_freep(&s->pred_dir_table);
 731
 732     av_freep(&s->mbskip_table);
 733     av_freep(&s->prev_pict_types);
 734     av_freep(&s->bitstream_buffer);
 735     s->allocated_bitstream_buffer_size=0;
 736
 737     av_freep(&s->avctx->stats_out);
 738     av_freep(&s->ac_stats);
 739     av_freep(&s->error_status_table);
 740     av_freep(&s->mb_index2xy);
 741     av_freep(&s->lambda_table);
 742     av_freep(&s->q_intra_matrix);
 743     av_freep(&s->q_inter_matrix);
 744     av_freep(&s->q_intra_matrix16);
 745     av_freep(&s->q_inter_matrix16);
 746     av_freep(&s->input_picture);
 747     av_freep(&s->reordered_input_picture);
 748     av_freep(&s->dct_offset);
 749
 750     if(s->picture){
 751         for(i=0; i<MAX_PICTURE_COUNT; i++){
 752             free_picture(s, &s->picture[i]);
 753         }
 754     }
 755     av_freep(&s->picture);
 756     s->context_initialized = 0;
 757     s->last_picture_ptr=
 758     s->next_picture_ptr=
 759     s->current_picture_ptr= NULL;
 760     s->linesize= s->uvlinesize= 0;
 761
 762     for(i=0; i<3; i++)
 763         av_freep(&s->visualization_buffer[i]);
 764
 765     avcodec_default_free_buffers(s->avctx);
 766 }
 767
 768 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
 769 {
 770     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 771     uint8_t index_run[MAX_RUN+1];
 772     int last, run, level, start, end, i;
 773
 774     /* If table is static, we can quit if rl->max_level[0] is not NULL */
 775     if(static_store && rl->max_level[0])
 776         return;
 777
 778     /* compute max_level[], max_run[] and index_run[] */
 779     for(last=0;last<2;last++) {
 780         if (last == 0) {
 781             start = 0;
 782             end = rl->last;
 783         } else {
 784             start = rl->last;
 785             end = rl->n;
 786         }
 787
 788         memset(max_level, 0, MAX_RUN + 1);
 789         memset(max_run, 0, MAX_LEVEL + 1);
 790         memset(index_run, rl->n, MAX_RUN + 1);
 791         for(i=start;i<end;i++) {
 792             run = rl->table_run[i];
 793             level = rl->table_level[i];
 794             if (index_run[run] == rl->n)
 795                 index_run[run] = i;
 796             if (level > max_level[run])
 797                 max_level[run] = level;
 798             if (run > max_run[level])
 799                 max_run[level] = run;
 800         }
 801         if(static_store)
 802             rl->max_level[last] = static_store[last];
 803         else
 804             rl->max_level[last] = av_malloc(MAX_RUN + 1);
 805         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 806         if(static_store)
 807             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
 808         else
 809             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 810         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 811         if(static_store)
 812             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
 813         else
 814             rl->index_run[last] = av_malloc(MAX_RUN + 1);
 815         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 816     }
 817 }
 818
 819 void init_vlc_rl(RLTable *rl)
 820 {
 821     int i, q;
 822
 823     for(q=0; q<32; q++){
 824         int qmul= q*2;
 825         int qadd= (q-1)|1;
 826
 827         if(q==0){
 828             qmul=1;
 829             qadd=0;
 830         }
 831         for(i=0; i<rl->vlc.table_size; i++){
 832             int code= rl->vlc.table[i][0];
 833             int len = rl->vlc.table[i][1];
 834             int level, run;
 835
 836             if(len==0){ // illegal code
 837                 run= 66;
 838                 level= MAX_LEVEL;
 839             }else if(len<0){ //more bits needed
 840                 run= 0;
 841                 level= code;
 842             }else{
 843                 if(code==rl->n){ //esc
 844                     run= 66;
 845                     level= 0;
 846                 }else{
 847                     run=   rl->table_run  [code] + 1;
 848                     level= rl->table_level[code] * qmul + qadd;
 849                     if(code >= rl->last) run+=192;
 850                 }
 851             }
 852             rl->rl_vlc[q][i].len= len;
 853             rl->rl_vlc[q][i].level= level;
 854             rl->rl_vlc[q][i].run= run;
 855         }
 856     }
 857 }
 858
 859 int ff_find_unused_picture(MpegEncContext *s, int shared){
 860     int i;
 861
 862     if(shared){
 863         for(i=0; i<MAX_PICTURE_COUNT; i++){
 864             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
 865         }
 866     }else{
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
 869         }
 870         for(i=0; i<MAX_PICTURE_COUNT; i++){
 871             if(s->picture[i].data[0]==NULL) return i;
 872         }
 873     }
 874
 875     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
 876     /* We could return -1, but the codec would crash trying to draw into a
 877      * non-existing frame anyway. This is safer than waiting for a random crash.
 878      * Also the return of this is never useful, an encoder must only allocate
 879      * as much as allowed in the specification. This has no relationship to how
 880      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
 881      * enough for such valid streams).
 882      * Plus, a decoder has to check stream validity and remove frames if too
 883      * many reference frames are around. Waiting for "OOM" is not correct at
 884      * all. Similarly, missing reference frames have to be replaced by
 885      * interpolated/MC frames, anything else is a bug in the codec ...
 886      */
 887     abort();
 888     return -1;
 889 }
 890
 891 static void update_noise_reduction(MpegEncContext *s){
 892     int intra, i;
 893
 894     for(intra=0; intra<2; intra++){
 895         if(s->dct_count[intra] > (1<<16)){
 896             for(i=0; i<64; i++){
 897                 s->dct_error_sum[intra][i] >>=1;
 898             }
 899             s->dct_count[intra] >>= 1;
 900         }
 901
 902         for(i=0; i<64; i++){
 903             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
 904         }
 905     }
 906 }
 907
 908 /**
 909  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
 910  */
 911 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 912 {
 913     int i;
 914     Picture *pic;
 915     s->mb_skipped = 0;
 916
 917     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 918
 919     /* mark&release old frames */
 920     if (s->pict_type != FF_B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
 921       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
 922           free_frame_buffer(s, s->last_picture_ptr);
 923
 924         /* release forgotten pictures */
 925         /* if(mpeg124/h263) */
 926         if(!s->encoding){
 927             for(i=0; i<MAX_PICTURE_COUNT; i++){
 928                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 929                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
 930                     free_frame_buffer(s, &s->picture[i]);
 931                 }
 932             }
 933         }
 934       }
 935     }
 936
 937     if(!s->encoding){
 938         /* release non reference frames */
 939         for(i=0; i<MAX_PICTURE_COUNT; i++){
 940             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 941                 free_frame_buffer(s, &s->picture[i]);
 942             }
 943         }
 944
 945         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
 946             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
 947         else{
 948             i= ff_find_unused_picture(s, 0);
 949             pic= &s->picture[i];
 950         }
 951
 952         pic->reference= 0;
 953         if (!s->dropable){
 954             if (s->codec_id == CODEC_ID_H264)
 955                 pic->reference = s->picture_structure;
 956             else if (s->pict_type != FF_B_TYPE)
 957                 pic->reference = 3;
 958         }
 959
 960         pic->coded_picture_number= s->coded_picture_number++;
 961
 962         if(ff_alloc_picture(s, pic, 0) < 0)
 963             return -1;
 964
 965         s->current_picture_ptr= pic;
 966         //FIXME use only the vars from current_pic
 967         if(s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) {
 968             if(s->picture_structure == PICT_FRAME)
 969                 s->current_picture_ptr->top_field_first= s->top_field_first;
 970             else
 971                 s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
 972         } else
 973             s->current_picture_ptr->top_field_first= s->top_field_first;
 974         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
 975     }
 976
 977     s->current_picture_ptr->pict_type= s->pict_type;
 978 //    if(s->flags && CODEC_FLAG_QSCALE)
 979   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
 980     s->current_picture_ptr->key_frame= s->pict_type == FF_I_TYPE;
 981
 982     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
 983
 984     if (s->pict_type != FF_B_TYPE) {
 985         s->last_picture_ptr= s->next_picture_ptr;
 986         if(!s->dropable)
 987             s->next_picture_ptr= s->current_picture_ptr;
 988     }
 989 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
 990         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
 991         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
 992         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
 993         s->pict_type, s->dropable);*/
 994
 995     if(s->codec_id != CODEC_ID_H264){
 996         if((s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && s->pict_type!=FF_I_TYPE){
 997             av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
 998             /* Allocate a dummy frame */
 999             i= ff_find_unused_picture(s, 0);
1000             s->last_picture_ptr= &s->picture[i];
1001             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
1002                 return -1;
1003         }
1004         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==FF_B_TYPE){
1005             /* Allocate a dummy frame */
1006             i= ff_find_unused_picture(s, 0);
1007             s->next_picture_ptr= &s->picture[i];
1008             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
1009                 return -1;
1010         }
1011     }
1012
1013     if(s->last_picture_ptr) ff_copy_picture(&s->last_picture, s->last_picture_ptr);
1014     if(s->next_picture_ptr) ff_copy_picture(&s->next_picture, s->next_picture_ptr);
1015
1016     assert(s->pict_type == FF_I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1017
1018     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
1019         int i;
1020         for(i=0; i<4; i++){
1021             if(s->picture_structure == PICT_BOTTOM_FIELD){
1022                  s->current_picture.data[i] += s->current_picture.linesize[i];
1023             }
1024             s->current_picture.linesize[i] *= 2;
1025             s->last_picture.linesize[i] *=2;
1026             s->next_picture.linesize[i] *=2;
1027         }
1028     }
1029
1030     s->hurry_up= s->avctx->hurry_up;
1031     s->error_recognition= avctx->error_recognition;
1032
1033     /* set dequantizer, we can't do it during init as it might change for mpeg4
1034        and we can't do it in the header decode as init is not called for mpeg4 there yet */
1035     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1036         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1037         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1038     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1039         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1040         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1041     }else{
1042         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1043         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1044     }
1045
1046     if(s->dct_error_sum){
1047         assert(s->avctx->noise_reduction && s->encoding);
1048
1049         update_noise_reduction(s);
1050     }
1051
1052     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1053         return ff_xvmc_field_start(s, avctx);
1054
1055     return 0;
1056 }
1057
1058 /* generic function for encode/decode called after a frame has been coded/decoded */
1059 void MPV_frame_end(MpegEncContext *s)
1060 {
1061     int i;
1062     /* draw edge for correct motion prediction if outside */
1063     //just to make sure that all data is rendered.
1064     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1065         ff_xvmc_field_end(s);
1066     }else if(!s->avctx->hwaccel
1067        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1068        && s->unrestricted_mv
1069        && s->current_picture.reference
1070        && !s->intra_only
1071        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1072             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1073             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1074             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1075     }
1076     emms_c();
1077
1078     s->last_pict_type    = s->pict_type;
1079     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1080     if(s->pict_type!=FF_B_TYPE){
1081         s->last_non_b_pict_type= s->pict_type;
1082     }
1083 #if 0
1084         /* copy back current_picture variables */
1085     for(i=0; i<MAX_PICTURE_COUNT; i++){
1086         if(s->picture[i].data[0] == s->current_picture.data[0]){
1087             s->picture[i]= s->current_picture;
1088             break;
1089         }
1090     }
1091     assert(i<MAX_PICTURE_COUNT);
1092 #endif
1093
1094     if(s->encoding){
1095         /* release non-reference frames */
1096         for(i=0; i<MAX_PICTURE_COUNT; i++){
1097             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1098                 free_frame_buffer(s, &s->picture[i]);
1099             }
1100         }
1101     }
1102     // clear copies, to avoid confusion
1103 #if 0
1104     memset(&s->last_picture, 0, sizeof(Picture));
1105     memset(&s->next_picture, 0, sizeof(Picture));
1106     memset(&s->current_picture, 0, sizeof(Picture));
1107 #endif
1108     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1109 }
1110
1111 /**
1112  * draws an line from (ex, ey) -> (sx, sy).
1113  * @param w width of the image
1114  * @param h height of the image
1115  * @param stride stride/linesize of the image
1116  * @param color color of the arrow
1117  */
1118 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1119     int x, y, fr, f;
1120
1121     sx= av_clip(sx, 0, w-1);
1122     sy= av_clip(sy, 0, h-1);
1123     ex= av_clip(ex, 0, w-1);
1124     ey= av_clip(ey, 0, h-1);
1125
1126     buf[sy*stride + sx]+= color;
1127
1128     if(FFABS(ex - sx) > FFABS(ey - sy)){
1129         if(sx > ex){
1130             FFSWAP(int, sx, ex);
1131             FFSWAP(int, sy, ey);
1132         }
1133         buf+= sx + sy*stride;
1134         ex-= sx;
1135         f= ((ey-sy)<<16)/ex;
1136         for(x= 0; x <= ex; x++){
1137             y = (x*f)>>16;
1138             fr= (x*f)&0xFFFF;
1139             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1140             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1141         }
1142     }else{
1143         if(sy > ey){
1144             FFSWAP(int, sx, ex);
1145             FFSWAP(int, sy, ey);
1146         }
1147         buf+= sx + sy*stride;
1148         ey-= sy;
1149         if(ey) f= ((ex-sx)<<16)/ey;
1150         else   f= 0;
1151         for(y= 0; y <= ey; y++){
1152             x = (y*f)>>16;
1153             fr= (y*f)&0xFFFF;
1154             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;
1155             buf[y*stride + x+1]+= (color*         fr )>>16;
1156         }
1157     }
1158 }
1159
1160 /**
1161  * draws an arrow from (ex, ey) -> (sx, sy).
1162  * @param w width of the image
1163  * @param h height of the image
1164  * @param stride stride/linesize of the image
1165  * @param color color of the arrow
1166  */
1167 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1168     int dx,dy;
1169
1170     sx= av_clip(sx, -100, w+100);
1171     sy= av_clip(sy, -100, h+100);
1172     ex= av_clip(ex, -100, w+100);
1173     ey= av_clip(ey, -100, h+100);
1174
1175     dx= ex - sx;
1176     dy= ey - sy;
1177
1178     if(dx*dx + dy*dy > 3*3){
1179         int rx=  dx + dy;
1180         int ry= -dx + dy;
1181         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1182
1183         //FIXME subpixel accuracy
1184         rx= ROUNDED_DIV(rx*3<<4, length);
1185         ry= ROUNDED_DIV(ry*3<<4, length);
1186
1187         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1188         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1189     }
1190     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1191 }
1192
1193 /**
1194  * prints debuging info for the given picture.
1195  */
1196 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1197
1198     if(s->avctx->hwaccel || !pict || !pict->mb_type) return;
1199
1200     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1201         int x,y;
1202
1203         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1204         switch (pict->pict_type) {
1205             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1206             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1207             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1208             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1209             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1210             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1211         }
1212         for(y=0; y<s->mb_height; y++){
1213             for(x=0; x<s->mb_width; x++){
1214                 if(s->avctx->debug&FF_DEBUG_SKIP){
1215                     int count= s->mbskip_table[x + y*s->mb_stride];
1216                     if(count>9) count=9;
1217                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1218                 }
1219                 if(s->avctx->debug&FF_DEBUG_QP){
1220                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1221                 }
1222                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1223                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1224                     //Type & MV direction
1225                     if(IS_PCM(mb_type))
1226                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1227                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1228                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1229                     else if(IS_INTRA4x4(mb_type))
1230                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1231                     else if(IS_INTRA16x16(mb_type))
1232                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1233                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1234                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1235                     else if(IS_DIRECT(mb_type))
1236                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1237                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1238                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1239                     else if(IS_GMC(mb_type))
1240                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1241                     else if(IS_SKIP(mb_type))
1242                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1243                     else if(!USES_LIST(mb_type, 1))
1244                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1245                     else if(!USES_LIST(mb_type, 0))
1246                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1247                     else{
1248                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1249                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1250                     }
1251
1252                     //segmentation
1253                     if(IS_8X8(mb_type))
1254                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1255                     else if(IS_16X8(mb_type))
1256                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1257                     else if(IS_8X16(mb_type))
1258                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1259                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1260                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1261                     else
1262                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1263
1264
1265                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1266                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1267                     else
1268                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1269                 }
1270 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1271             }
1272             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1273         }
1274     }
1275
1276     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1277         const int shift= 1 + s->quarter_sample;
1278         int mb_y;
1279         uint8_t *ptr;
1280         int i;
1281         int h_chroma_shift, v_chroma_shift, block_height;
1282         const int width = s->avctx->width;
1283         const int height= s->avctx->height;
1284         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1285         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1286         s->low_delay=0; //needed to see the vectors without trashing the buffers
1287
1288         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1289         for(i=0; i<3; i++){
1290             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1291             pict->data[i]= s->visualization_buffer[i];
1292         }
1293         pict->type= FF_BUFFER_TYPE_COPY;
1294         ptr= pict->data[0];
1295         block_height = 16>>v_chroma_shift;
1296
1297         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1298             int mb_x;
1299             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1300                 const int mb_index= mb_x + mb_y*s->mb_stride;
1301                 if((s->avctx->debug_mv) && pict->motion_val){
1302                   int type;
1303                   for(type=0; type<3; type++){
1304                     int direction = 0;
1305                     switch (type) {
1306                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1307                                 continue;
1308                               direction = 0;
1309                               break;
1310                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1311                                 continue;
1312                               direction = 0;
1313                               break;
1314                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1315                                 continue;
1316                               direction = 1;
1317                               break;
1318                     }
1319                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1320                         continue;
1321
1322                     if(IS_8X8(pict->mb_type[mb_index])){
1323                       int i;
1324                       for(i=0; i<4; i++){
1325                         int sx= mb_x*16 + 4 + 8*(i&1);
1326                         int sy= mb_y*16 + 4 + 8*(i>>1);
1327                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1328                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1329                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1330                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1331                       }
1332                     }else if(IS_16X8(pict->mb_type[mb_index])){
1333                       int i;
1334                       for(i=0; i<2; i++){
1335                         int sx=mb_x*16 + 8;
1336                         int sy=mb_y*16 + 4 + 8*i;
1337                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1338                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1339                         int my=(pict->motion_val[direction][xy][1]>>shift);
1340
1341                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1342                             my*=2;
1343
1344                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1345                       }
1346                     }else if(IS_8X16(pict->mb_type[mb_index])){
1347                       int i;
1348                       for(i=0; i<2; i++){
1349                         int sx=mb_x*16 + 4 + 8*i;
1350                         int sy=mb_y*16 + 8;
1351                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1352                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1353                         int my=(pict->motion_val[direction][xy][1]>>shift);
1354
1355                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1356                             my*=2;
1357
1358                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1359                       }
1360                     }else{
1361                       int sx= mb_x*16 + 8;
1362                       int sy= mb_y*16 + 8;
1363                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1364                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1365                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1366                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1367                     }
1368                   }
1369                 }
1370                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1371                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1372                     int y;
1373                     for(y=0; y<block_height; y++){
1374                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= c;
1375                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= c;
1376                     }
1377                 }
1378                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1379                     int mb_type= pict->mb_type[mb_index];
1380                     uint64_t u,v;
1381                     int y;
1382 #define COLOR(theta, r)\
1383 u= (int)(128 + r*cos(theta*3.141592/180));\
1384 v= (int)(128 + r*sin(theta*3.141592/180));
1385
1386
1387                     u=v=128;
1388                     if(IS_PCM(mb_type)){
1389                         COLOR(120,48)
1390                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1391                         COLOR(30,48)
1392                     }else if(IS_INTRA4x4(mb_type)){
1393                         COLOR(90,48)
1394                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1395 //                        COLOR(120,48)
1396                     }else if(IS_DIRECT(mb_type)){
1397                         COLOR(150,48)
1398                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1399                         COLOR(170,48)
1400                     }else if(IS_GMC(mb_type)){
1401                         COLOR(190,48)
1402                     }else if(IS_SKIP(mb_type)){
1403 //                        COLOR(180,48)
1404                     }else if(!USES_LIST(mb_type, 1)){
1405                         COLOR(240,48)
1406                     }else if(!USES_LIST(mb_type, 0)){
1407                         COLOR(0,48)
1408                     }else{
1409                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1410                         COLOR(300,48)
1411                     }
1412
1413                     u*= 0x0101010101010101ULL;
1414                     v*= 0x0101010101010101ULL;
1415                     for(y=0; y<block_height; y++){
1416                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= u;
1417                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= v;
1418                     }
1419
1420                     //segmentation
1421                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1422                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1423                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1424                     }
1425                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1426                         for(y=0; y<16; y++)
1427                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1428                     }
1429                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1430                         int dm= 1 << (mv_sample_log2-2);
1431                         for(i=0; i<4; i++){
1432                             int sx= mb_x*16 + 8*(i&1);
1433                             int sy= mb_y*16 + 8*(i>>1);
1434                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1435                             //FIXME bidir
1436                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1437                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1438                                 for(y=0; y<8; y++)
1439                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1440                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1441                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1442                         }
1443                     }
1444
1445                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1446                         // hmm
1447                     }
1448                 }
1449                 s->mbskip_table[mb_index]=0;
1450             }
1451         }
1452     }
1453 }
1454
1455 static inline int hpel_motion_lowres(MpegEncContext *s,
1456                                   uint8_t *dest, uint8_t *src,
1457                                   int field_based, int field_select,
1458                                   int src_x, int src_y,
1459                                   int width, int height, int stride,
1460                                   int h_edge_pos, int v_edge_pos,
1461                                   int w, int h, h264_chroma_mc_func *pix_op,
1462                                   int motion_x, int motion_y)
1463 {
1464     const int lowres= s->avctx->lowres;
1465     const int op_index= FFMIN(lowres, 2);
1466     const int s_mask= (2<<lowres)-1;
1467     int emu=0;
1468     int sx, sy;
1469
1470     if(s->quarter_sample){
1471         motion_x/=2;
1472         motion_y/=2;
1473     }
1474
1475     sx= motion_x & s_mask;
1476     sy= motion_y & s_mask;
1477     src_x += motion_x >> (lowres+1);
1478     src_y += motion_y >> (lowres+1);
1479
1480     src += src_y * stride + src_x;
1481
1482     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1483        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1484         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1485                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1486         src= s->edge_emu_buffer;
1487         emu=1;
1488     }
1489
1490     sx= (sx << 2) >> lowres;
1491     sy= (sy << 2) >> lowres;
1492     if(field_select)
1493         src += s->linesize;
1494     pix_op[op_index](dest, src, stride, h, sx, sy);
1495     return emu;
1496 }
1497
1498 /* apply one mpeg motion vector to the three components */
1499 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1500                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1501                                int field_based, int bottom_field, int field_select,
1502                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1503                                int motion_x, int motion_y, int h, int mb_y)
1504 {
1505     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1506     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1507     const int lowres= s->avctx->lowres;
1508     const int op_index= FFMIN(lowres, 2);
1509     const int block_s= 8>>lowres;
1510     const int s_mask= (2<<lowres)-1;
1511     const int h_edge_pos = s->h_edge_pos >> lowres;
1512     const int v_edge_pos = s->v_edge_pos >> lowres;
1513     linesize   = s->current_picture.linesize[0] << field_based;
1514     uvlinesize = s->current_picture.linesize[1] << field_based;
1515
1516     if(s->quarter_sample){ //FIXME obviously not perfect but qpel will not work in lowres anyway
1517         motion_x/=2;
1518         motion_y/=2;
1519     }
1520
1521     if(field_based){
1522         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1523     }
1524
1525     sx= motion_x & s_mask;
1526     sy= motion_y & s_mask;
1527     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1528     src_y =(   mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1529
1530     if (s->out_format == FMT_H263) {
1531         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1532         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1533         uvsrc_x = src_x>>1;
1534         uvsrc_y = src_y>>1;
1535     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1536         mx = motion_x / 4;
1537         my = motion_y / 4;
1538         uvsx = (2*mx) & s_mask;
1539         uvsy = (2*my) & s_mask;
1540         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1541         uvsrc_y =    mb_y*block_s               + (my >> lowres);
1542     } else {
1543         mx = motion_x / 2;
1544         my = motion_y / 2;
1545         uvsx = mx & s_mask;
1546         uvsy = my & s_mask;
1547         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1548         uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
1549     }
1550
1551     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1552     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1553     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1554
1555     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1556        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1557             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1558                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1559             ptr_y = s->edge_emu_buffer;
1560             if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1561                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1562                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1563                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1564                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1565                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1566                 ptr_cb= uvbuf;
1567                 ptr_cr= uvbuf+16;
1568             }
1569     }
1570
1571     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1572         dest_y += s->linesize;
1573         dest_cb+= s->uvlinesize;
1574         dest_cr+= s->uvlinesize;
1575     }
1576
1577     if(field_select){
1578         ptr_y += s->linesize;
1579         ptr_cb+= s->uvlinesize;
1580         ptr_cr+= s->uvlinesize;
1581     }
1582
1583     sx= (sx << 2) >> lowres;
1584     sy= (sy << 2) >> lowres;
1585     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1586
1587     if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1588         uvsx= (uvsx << 2) >> lowres;
1589         uvsy= (uvsy << 2) >> lowres;
1590         pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1591         pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1592     }
1593     //FIXME h261 lowres loop filter
1594 }
1595
1596 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1597                                      uint8_t *dest_cb, uint8_t *dest_cr,
1598                                      uint8_t **ref_picture,
1599                                      h264_chroma_mc_func *pix_op,
1600                                      int mx, int my){
1601     const int lowres= s->avctx->lowres;
1602     const int op_index= FFMIN(lowres, 2);
1603     const int block_s= 8>>lowres;
1604     const int s_mask= (2<<lowres)-1;
1605     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1606     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1607     int emu=0, src_x, src_y, offset, sx, sy;
1608     uint8_t *ptr;
1609
1610     if(s->quarter_sample){
1611         mx/=2;
1612         my/=2;
1613     }
1614
1615     /* In case of 8X8, we construct a single chroma motion vector
1616        with a special rounding */
1617     mx= ff_h263_round_chroma(mx);
1618     my= ff_h263_round_chroma(my);
1619
1620     sx= mx & s_mask;
1621     sy= my & s_mask;
1622     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1623     src_y = s->mb_y*block_s + (my >> (lowres+1));
1624
1625     offset = src_y * s->uvlinesize + src_x;
1626     ptr = ref_picture[1] + offset;
1627     if(s->flags&CODEC_FLAG_EMU_EDGE){
1628         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1629            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1630             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1631             ptr= s->edge_emu_buffer;
1632             emu=1;
1633         }
1634     }
1635     sx= (sx << 2) >> lowres;
1636     sy= (sy << 2) >> lowres;
1637     pix_op[op_index](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1638
1639     ptr = ref_picture[2] + offset;
1640     if(emu){
1641         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1642         ptr= s->edge_emu_buffer;
1643     }
1644     pix_op[op_index](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1645 }
1646
1647 /**
1648  * motion compensation of a single macroblock
1649  * @param s context
1650  * @param dest_y luma destination pointer
1651  * @param dest_cb chroma cb/u destination pointer
1652  * @param dest_cr chroma cr/v destination pointer
1653  * @param dir direction (0->forward, 1->backward)
1654  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1655  * @param pix_op halfpel motion compensation function (average or put normally)
1656  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1657  */
1658 static inline void MPV_motion_lowres(MpegEncContext *s,
1659                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660                               int dir, uint8_t **ref_picture,
1661                               h264_chroma_mc_func *pix_op)
1662 {
1663     int mx, my;
1664     int mb_x, mb_y, i;
1665     const int lowres= s->avctx->lowres;
1666     const int block_s= 8>>lowres;
1667
1668     mb_x = s->mb_x;
1669     mb_y = s->mb_y;
1670
1671     switch(s->mv_type) {
1672     case MV_TYPE_16X16:
1673         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1674                     0, 0, 0,
1675                     ref_picture, pix_op,
1676                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y);
1677         break;
1678     case MV_TYPE_8X8:
1679         mx = 0;
1680         my = 0;
1681             for(i=0;i<4;i++) {
1682                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1683                             ref_picture[0], 0, 0,
1684                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1685                             s->width, s->height, s->linesize,
1686                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1687                             block_s, block_s, pix_op,
1688                             s->mv[dir][i][0], s->mv[dir][i][1]);
1689
1690                 mx += s->mv[dir][i][0];
1691                 my += s->mv[dir][i][1];
1692             }
1693
1694         if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1695             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1696         break;
1697     case MV_TYPE_FIELD:
1698         if (s->picture_structure == PICT_FRAME) {
1699             /* top field */
1700             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1701                         1, 0, s->field_select[dir][0],
1702                         ref_picture, pix_op,
1703                         s->mv[dir][0][0], s->mv[dir][0][1], block_s, mb_y);
1704             /* bottom field */
1705             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1706                         1, 1, s->field_select[dir][1],
1707                         ref_picture, pix_op,
1708                         s->mv[dir][1][0], s->mv[dir][1][1], block_s, mb_y);
1709         } else {
1710             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != FF_B_TYPE && !s->first_field){
1711                 ref_picture= s->current_picture_ptr->data;
1712             }
1713
1714             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1715                         0, 0, s->field_select[dir][0],
1716                         ref_picture, pix_op,
1717                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y>>1);
1718         }
1719         break;
1720     case MV_TYPE_16X8:
1721         for(i=0; i<2; i++){
1722             uint8_t ** ref2picture;
1723
1724             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == FF_B_TYPE || s->first_field){
1725                 ref2picture= ref_picture;
1726             }else{
1727                 ref2picture= s->current_picture_ptr->data;
1728             }
1729
1730             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1731                         0, 0, s->field_select[dir][i],
1732                         ref2picture, pix_op,
1733                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s, mb_y>>1);
1734
1735             dest_y += 2*block_s*s->linesize;
1736             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1737             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1738         }
1739         break;
1740     case MV_TYPE_DMV:
1741         if(s->picture_structure == PICT_FRAME){
1742             for(i=0; i<2; i++){
1743                 int j;
1744                 for(j=0; j<2; j++){
1745                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1746                                 1, j, j^i,
1747                                 ref_picture, pix_op,
1748                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s, mb_y);
1749                 }
1750                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1751             }
1752         }else{
1753             for(i=0; i<2; i++){
1754                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1755                             0, 0, s->picture_structure != i+1,
1756                             ref_picture, pix_op,
1757                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s, mb_y>>1);
1758
1759                 // after put we make avg of the same block
1760                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1761
1762                 //opposite parity is always in the same frame if this is second field
1763                 if(!s->first_field){
1764                     ref_picture = s->current_picture_ptr->data;
1765                 }
1766             }
1767         }
1768     break;
1769     default: assert(0);
1770     }
1771 }
1772
1773 /* put block[] to dest[] */
1774 static inline void put_dct(MpegEncContext *s,
1775                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1776 {
1777     s->dct_unquantize_intra(s, block, i, qscale);
1778     s->dsp.idct_put (dest, line_size, block);
1779 }
1780
1781 /* add block[] to dest[] */
1782 static inline void add_dct(MpegEncContext *s,
1783                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1784 {
1785     if (s->block_last_index[i] >= 0) {
1786         s->dsp.idct_add (dest, line_size, block);
1787     }
1788 }
1789
1790 static inline void add_dequant_dct(MpegEncContext *s,
1791                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1792 {
1793     if (s->block_last_index[i] >= 0) {
1794         s->dct_unquantize_inter(s, block, i, qscale);
1795
1796         s->dsp.idct_add (dest, line_size, block);
1797     }
1798 }
1799
1800 /**
1801  * cleans dc, ac, coded_block for the current non intra MB
1802  */
1803 void ff_clean_intra_table_entries(MpegEncContext *s)
1804 {
1805     int wrap = s->b8_stride;
1806     int xy = s->block_index[0];
1807
1808     s->dc_val[0][xy           ] =
1809     s->dc_val[0][xy + 1       ] =
1810     s->dc_val[0][xy     + wrap] =
1811     s->dc_val[0][xy + 1 + wrap] = 1024;
1812     /* ac pred */
1813     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1814     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1815     if (s->msmpeg4_version>=3) {
1816         s->coded_block[xy           ] =
1817         s->coded_block[xy + 1       ] =
1818         s->coded_block[xy     + wrap] =
1819         s->coded_block[xy + 1 + wrap] = 0;
1820     }
1821     /* chroma */
1822     wrap = s->mb_stride;
1823     xy = s->mb_x + s->mb_y * wrap;
1824     s->dc_val[1][xy] =
1825     s->dc_val[2][xy] = 1024;
1826     /* ac pred */
1827     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1828     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1829
1830     s->mbintra_table[xy]= 0;
1831 }
1832
1833 /* generic function called after a macroblock has been parsed by the
1834    decoder or after it has been encoded by the encoder.
1835
1836    Important variables used:
1837    s->mb_intra : true if intra macroblock
1838    s->mv_dir   : motion vector direction
1839    s->mv_type  : motion vector type
1840    s->mv       : motion vector
1841    s->interlaced_dct : true if interlaced dct used (mpeg2)
1842  */
1843 static av_always_inline
1844 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1845                             int lowres_flag, int is_mpeg12)
1846 {
1847     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1848     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1849         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1850         return;
1851     }
1852
1853     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1854        /* save DCT coefficients */
1855        int i,j;
1856        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1857        for(i=0; i<6; i++)
1858            for(j=0; j<64; j++)
1859                *dct++ = block[i][s->dsp.idct_permutation[j]];
1860     }
1861
1862     s->current_picture.qscale_table[mb_xy]= s->qscale;
1863
1864     /* update DC predictors for P macroblocks */
1865     if (!s->mb_intra) {
1866         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1867             if(s->mbintra_table[mb_xy])
1868                 ff_clean_intra_table_entries(s);
1869         } else {
1870             s->last_dc[0] =
1871             s->last_dc[1] =
1872             s->last_dc[2] = 128 << s->intra_dc_precision;
1873         }
1874     }
1875     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1876         s->mbintra_table[mb_xy]=1;
1877
1878     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1879         uint8_t *dest_y, *dest_cb, *dest_cr;
1880         int dct_linesize, dct_offset;
1881         op_pixels_func (*op_pix)[4];
1882         qpel_mc_func (*op_qpix)[16];
1883         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1884         const int uvlinesize= s->current_picture.linesize[1];
1885         const int readable= s->pict_type != FF_B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1886         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1887
1888         /* avoid copy if macroblock skipped in last frame too */
1889         /* skip only during decoding as we might trash the buffers during encoding a bit */
1890         if(!s->encoding){
1891             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1892             const int age= s->current_picture.age;
1893
1894             assert(age);
1895
1896             if (s->mb_skipped) {
1897                 s->mb_skipped= 0;
1898                 assert(s->pict_type!=FF_I_TYPE);
1899
1900                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1901                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1902
1903                 /* if previous was skipped too, then nothing to do !  */
1904                 if (*mbskip_ptr >= age && s->current_picture.reference){
1905                     return;
1906                 }
1907             } else if(!s->current_picture.reference){
1908                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1909                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1910             } else{
1911                 *mbskip_ptr = 0; /* not skipped */
1912             }
1913         }
1914
1915         dct_linesize = linesize << s->interlaced_dct;
1916         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1917
1918         if(readable){
1919             dest_y=  s->dest[0];
1920             dest_cb= s->dest[1];
1921             dest_cr= s->dest[2];
1922         }else{
1923             dest_y = s->b_scratchpad;
1924             dest_cb= s->b_scratchpad+16*linesize;
1925             dest_cr= s->b_scratchpad+32*linesize;
1926         }
1927
1928         if (!s->mb_intra) {
1929             /* motion handling */
1930             /* decoding or more than one mb_type (MC was already done otherwise) */
1931             if(!s->encoding){
1932                 if(lowres_flag){
1933                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1934
1935                     if (s->mv_dir & MV_DIR_FORWARD) {
1936                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1937                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1938                     }
1939                     if (s->mv_dir & MV_DIR_BACKWARD) {
1940                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1941                     }
1942                 }else{
1943                     op_qpix= s->me.qpel_put;
1944                     if ((!s->no_rounding) || s->pict_type==FF_B_TYPE){
1945                         op_pix = s->dsp.put_pixels_tab;
1946                     }else{
1947                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1948                     }
1949                     if (s->mv_dir & MV_DIR_FORWARD) {
1950                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1951                         op_pix = s->dsp.avg_pixels_tab;
1952                         op_qpix= s->me.qpel_avg;
1953                     }
1954                     if (s->mv_dir & MV_DIR_BACKWARD) {
1955                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1956                     }
1957                 }
1958             }
1959
1960             /* skip dequant / idct if we are really late ;) */
1961             if(s->hurry_up>1) goto skip_idct;
1962             if(s->avctx->skip_idct){
1963                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
1964                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
1965                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1966                     goto skip_idct;
1967             }
1968
1969             /* add dct residue */
1970             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1971                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1972                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1973                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1974                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1975                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1976
1977                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1978                     if (s->chroma_y_shift){
1979                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1980                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1981                     }else{
1982                         dct_linesize >>= 1;
1983                         dct_offset >>=1;
1984                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1985                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1986                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1987                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1988                     }
1989                 }
1990             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
1991                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1992                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1993                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1994                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1995
1996                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1997                     if(s->chroma_y_shift){//Chroma420
1998                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
1999                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2000                     }else{
2001                         //chroma422
2002                         dct_linesize = uvlinesize << s->interlaced_dct;
2003                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2004
2005                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2006                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2007                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2008                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2009                         if(!s->chroma_x_shift){//Chroma444
2010                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2011                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2012                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2013                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2014                         }
2015                     }
2016                 }//fi gray
2017             }
2018             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
2019                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2020             }
2021         } else {
2022             /* dct only in intra block */
2023             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2024                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2025                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2026                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2027                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2028
2029                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2030                     if(s->chroma_y_shift){
2031                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2032                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2033                     }else{
2034                         dct_offset >>=1;
2035                         dct_linesize >>=1;
2036                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2037                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2038                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2039                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2040                     }
2041                 }
2042             }else{
2043                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2044                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2045                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2046                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2047
2048                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2049                     if(s->chroma_y_shift){
2050                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2051                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2052                     }else{
2053
2054                         dct_linesize = uvlinesize << s->interlaced_dct;
2055                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2056
2057                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2058                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2059                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2060                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2061                         if(!s->chroma_x_shift){//Chroma444
2062                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2063                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2064                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2065                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2066                         }
2067                     }
2068                 }//gray
2069             }
2070         }
2071 skip_idct:
2072         if(!readable){
2073             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2074             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2075             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2076         }
2077     }
2078 }
2079
2080 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2081 #if !CONFIG_SMALL
2082     if(s->out_format == FMT_MPEG1) {
2083         if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
2084         else                 MPV_decode_mb_internal(s, block, 0, 1);
2085     } else
2086 #endif
2087     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
2088     else                  MPV_decode_mb_internal(s, block, 0, 0);
2089 }
2090
2091 /**
2092  *
2093  * @param h is the normal height, this will be reduced automatically if needed for the last row
2094  */
2095 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2096     if (s->avctx->draw_horiz_band) {
2097         AVFrame *src;
2098         const int field_pic= s->picture_structure != PICT_FRAME;
2099         int offset[4];
2100
2101         h= FFMIN(h, (s->avctx->height>>field_pic) - y);
2102
2103         if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
2104             h <<= 1;
2105             y <<= 1;
2106             if(s->first_field) return;
2107         }
2108
2109         if(s->pict_type==FF_B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2110             src= (AVFrame*)s->current_picture_ptr;
2111         else if(s->last_picture_ptr)
2112             src= (AVFrame*)s->last_picture_ptr;
2113         else
2114             return;
2115
2116         if(s->pict_type==FF_B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2117             offset[0]=
2118             offset[1]=
2119             offset[2]=
2120             offset[3]= 0;
2121         }else{
2122             offset[0]= y * s->linesize;
2123             offset[1]=
2124             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2125             offset[3]= 0;
2126         }
2127
2128         emms_c();
2129
2130         s->avctx->draw_horiz_band(s->avctx, src, offset,
2131                                   y, s->picture_structure, h);
2132     }
2133 }
2134
2135 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2136     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2137     const int uvlinesize= s->current_picture.linesize[1];
2138     const int mb_size= 4 - s->avctx->lowres;
2139
2140     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2141     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2142     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2143     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2144     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2145     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2146     //block_index is not used by mpeg2, so it is not affected by chroma_format
2147
2148     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2149     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2150     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2151
2152     if(!(s->pict_type==FF_B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2153     {
2154         if(s->picture_structure==PICT_FRAME){
2155         s->dest[0] += s->mb_y *   linesize << mb_size;
2156         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2157         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2158         }else{
2159             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2160             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2161             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2162             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2163         }
2164     }
2165 }
2166
2167 void ff_mpeg_flush(AVCodecContext *avctx){
2168     int i;
2169     MpegEncContext *s = avctx->priv_data;
2170
2171     if(s==NULL || s->picture==NULL)
2172         return;
2173
2174     for(i=0; i<MAX_PICTURE_COUNT; i++){
2175        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2176                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2177         free_frame_buffer(s, &s->picture[i]);
2178     }
2179     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2180
2181     s->mb_x= s->mb_y= 0;
2182     s->closed_gop= 0;
2183
2184     s->parse_context.state= -1;
2185     s->parse_context.frame_start_found= 0;
2186     s->parse_context.overread= 0;
2187     s->parse_context.overread_index= 0;
2188     s->parse_context.index= 0;
2189     s->parse_context.last_index= 0;
2190     s->bitstream_buffer_size=0;
2191     s->pp_time=0;
2192 }
2193
2194 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2195                                    DCTELEM *block, int n, int qscale)
2196 {
2197     int i, level, nCoeffs;
2198     const uint16_t *quant_matrix;
2199
2200     nCoeffs= s->block_last_index[n];
2201
2202     if (n < 4)
2203         block[0] = block[0] * s->y_dc_scale;
2204     else
2205         block[0] = block[0] * s->c_dc_scale;
2206     /* XXX: only mpeg1 */
2207     quant_matrix = s->intra_matrix;
2208     for(i=1;i<=nCoeffs;i++) {
2209         int j= s->intra_scantable.permutated[i];
2210         level = block[j];
2211         if (level) {
2212             if (level < 0) {
2213                 level = -level;
2214                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2215                 level = (level - 1) | 1;
2216                 level = -level;
2217             } else {
2218                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2219                 level = (level - 1) | 1;
2220             }
2221             block[j] = level;
2222         }
2223     }
2224 }
2225
2226 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2227                                    DCTELEM *block, int n, int qscale)
2228 {
2229     int i, level, nCoeffs;
2230     const uint16_t *quant_matrix;
2231
2232     nCoeffs= s->block_last_index[n];
2233
2234     quant_matrix = s->inter_matrix;
2235     for(i=0; i<=nCoeffs; i++) {
2236         int j= s->intra_scantable.permutated[i];
2237         level = block[j];
2238         if (level) {
2239             if (level < 0) {
2240                 level = -level;
2241                 level = (((level << 1) + 1) * qscale *
2242                          ((int) (quant_matrix[j]))) >> 4;
2243                 level = (level - 1) | 1;
2244                 level = -level;
2245             } else {
2246                 level = (((level << 1) + 1) * qscale *
2247                          ((int) (quant_matrix[j]))) >> 4;
2248                 level = (level - 1) | 1;
2249             }
2250             block[j] = level;
2251         }
2252     }
2253 }
2254
2255 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2256                                    DCTELEM *block, int n, int qscale)
2257 {
2258     int i, level, nCoeffs;
2259     const uint16_t *quant_matrix;
2260
2261     if(s->alternate_scan) nCoeffs= 63;
2262     else nCoeffs= s->block_last_index[n];
2263
2264     if (n < 4)
2265         block[0] = block[0] * s->y_dc_scale;
2266     else
2267         block[0] = block[0] * s->c_dc_scale;
2268     quant_matrix = s->intra_matrix;
2269     for(i=1;i<=nCoeffs;i++) {
2270         int j= s->intra_scantable.permutated[i];
2271         level = block[j];
2272         if (level) {
2273             if (level < 0) {
2274                 level = -level;
2275                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2276                 level = -level;
2277             } else {
2278                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2279             }
2280             block[j] = level;
2281         }
2282     }
2283 }
2284
2285 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2286                                    DCTELEM *block, int n, int qscale)
2287 {
2288     int i, level, nCoeffs;
2289     const uint16_t *quant_matrix;
2290     int sum=-1;
2291
2292     if(s->alternate_scan) nCoeffs= 63;
2293     else nCoeffs= s->block_last_index[n];
2294
2295     if (n < 4)
2296         block[0] = block[0] * s->y_dc_scale;
2297     else
2298         block[0] = block[0] * s->c_dc_scale;
2299     quant_matrix = s->intra_matrix;
2300     for(i=1;i<=nCoeffs;i++) {
2301         int j= s->intra_scantable.permutated[i];
2302         level = block[j];
2303         if (level) {
2304             if (level < 0) {
2305                 level = -level;
2306                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2307                 level = -level;
2308             } else {
2309                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2310             }
2311             block[j] = level;
2312             sum+=level;
2313         }
2314     }
2315     block[63]^=sum&1;
2316 }
2317
2318 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2319                                    DCTELEM *block, int n, int qscale)
2320 {
2321     int i, level, nCoeffs;
2322     const uint16_t *quant_matrix;
2323     int sum=-1;
2324
2325     if(s->alternate_scan) nCoeffs= 63;
2326     else nCoeffs= s->block_last_index[n];
2327
2328     quant_matrix = s->inter_matrix;
2329     for(i=0; i<=nCoeffs; i++) {
2330         int j= s->intra_scantable.permutated[i];
2331         level = block[j];
2332         if (level) {
2333             if (level < 0) {
2334                 level = -level;
2335                 level = (((level << 1) + 1) * qscale *
2336                          ((int) (quant_matrix[j]))) >> 4;
2337                 level = -level;
2338             } else {
2339                 level = (((level << 1) + 1) * qscale *
2340                          ((int) (quant_matrix[j]))) >> 4;
2341             }
2342             block[j] = level;
2343             sum+=level;
2344         }
2345     }
2346     block[63]^=sum&1;
2347 }
2348
2349 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2350                                   DCTELEM *block, int n, int qscale)
2351 {
2352     int i, level, qmul, qadd;
2353     int nCoeffs;
2354
2355     assert(s->block_last_index[n]>=0);
2356
2357     qmul = qscale << 1;
2358
2359     if (!s->h263_aic) {
2360         if (n < 4)
2361             block[0] = block[0] * s->y_dc_scale;
2362         else
2363             block[0] = block[0] * s->c_dc_scale;
2364         qadd = (qscale - 1) | 1;
2365     }else{
2366         qadd = 0;
2367     }
2368     if(s->ac_pred)
2369         nCoeffs=63;
2370     else
2371         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2372
2373     for(i=1; i<=nCoeffs; i++) {
2374         level = block[i];
2375         if (level) {
2376             if (level < 0) {
2377                 level = level * qmul - qadd;
2378             } else {
2379                 level = level * qmul + qadd;
2380             }
2381             block[i] = level;
2382         }
2383     }
2384 }
2385
2386 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2387                                   DCTELEM *block, int n, int qscale)
2388 {
2389     int i, level, qmul, qadd;
2390     int nCoeffs;
2391
2392     assert(s->block_last_index[n]>=0);
2393
2394     qadd = (qscale - 1) | 1;
2395     qmul = qscale << 1;
2396
2397     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2398
2399     for(i=0; i<=nCoeffs; i++) {
2400         level = block[i];
2401         if (level) {
2402             if (level < 0) {
2403                 level = level * qmul - qadd;
2404             } else {
2405                 level = level * qmul + qadd;
2406             }
2407             block[i] = level;
2408         }
2409     }
2410 }
2411
2412 /**
2413  * set qscale and update qscale dependent variables.
2414  */
2415 void ff_set_qscale(MpegEncContext * s, int qscale)
2416 {
2417     if (qscale < 1)
2418         qscale = 1;
2419     else if (qscale > 31)
2420         qscale = 31;
2421
2422     s->qscale = qscale;
2423     s->chroma_qscale= s->chroma_qscale_table[qscale];
2424
2425     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2426     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2427 }