git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavcore/imgutils.h"
  32 #include "avcodec.h"
  33 #include "dsputil.h"
  34 #include "internal.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "mjpegenc.h"
  38 #include "msmpeg4.h"
  39 #include "faandct.h"
  40 #include "xvmc_internal.h"
  41 #include <limits.h>
  42
  43 //#undef NDEBUG
  44 //#include <assert.h>
  45
  46 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  47                                    DCTELEM *block, int n, int qscale);
  48 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  49                                    DCTELEM *block, int n, int qscale);
  50 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  51                                    DCTELEM *block, int n, int qscale);
  52 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  53                                    DCTELEM *block, int n, int qscale);
  54 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  55                                    DCTELEM *block, int n, int qscale);
  56 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  57                                   DCTELEM *block, int n, int qscale);
  58 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  59                                   DCTELEM *block, int n, int qscale);
  60
  61
  62 /* enable all paranoid tests for rounding, overflows, etc... */
  63 //#define PARANOID
  64
  65 //#define DEBUG
  66
  67
  68 static const uint8_t ff_default_chroma_qscale_table[32]={
  69 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  70     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
  71 };
  72
  73 const uint8_t ff_mpeg1_dc_scale_table[128]={
  74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  75     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  76     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  77     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  78     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  79 };
  80
  81 static const uint8_t mpeg2_dc_scale_table1[128]={
  82 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  84     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  85     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  86     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  87 };
  88
  89 static const uint8_t mpeg2_dc_scale_table2[128]={
  90 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  91     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95 };
  96
  97 static const uint8_t mpeg2_dc_scale_table3[128]={
  98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  99     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 100     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 101     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 102     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 103 };
 104
 105 const uint8_t * const ff_mpeg2_dc_scale_table[4]={
 106     ff_mpeg1_dc_scale_table,
 107     mpeg2_dc_scale_table1,
 108     mpeg2_dc_scale_table2,
 109     mpeg2_dc_scale_table3,
 110 };
 111
 112 const enum PixelFormat ff_pixfmt_list_420[] = {
 113     PIX_FMT_YUV420P,
 114     PIX_FMT_NONE
 115 };
 116
 117 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
 118     PIX_FMT_DXVA2_VLD,
 119     PIX_FMT_VAAPI_VLD,
 120     PIX_FMT_YUV420P,
 121     PIX_FMT_NONE
 122 };
 123
 124 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
 125     int i;
 126
 127     assert(p<=end);
 128     if(p>=end)
 129         return end;
 130
 131     for(i=0; i<3; i++){
 132         uint32_t tmp= *state << 8;
 133         *state= tmp + *(p++);
 134         if(tmp == 0x100 || p==end)
 135             return p;
 136     }
 137
 138     while(p<end){
 139         if     (p[-1] > 1      ) p+= 3;
 140         else if(p[-2]          ) p+= 2;
 141         else if(p[-3]|(p[-1]-1)) p++;
 142         else{
 143             p++;
 144             break;
 145         }
 146     }
 147
 148     p= FFMIN(p, end)-4;
 149     *state= AV_RB32(p);
 150
 151     return p+4;
 152 }
 153
 154 /* init common dct for both encoder and decoder */
 155 av_cold int ff_dct_common_init(MpegEncContext *s)
 156 {
 157     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 158     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 159     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 160     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 161     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 162     if(s->flags & CODEC_FLAG_BITEXACT)
 163         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 164     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 165
 166 #if   HAVE_MMX
 167     MPV_common_init_mmx(s);
 168 #elif ARCH_ALPHA
 169     MPV_common_init_axp(s);
 170 #elif CONFIG_MLIB
 171     MPV_common_init_mlib(s);
 172 #elif HAVE_MMI
 173     MPV_common_init_mmi(s);
 174 #elif ARCH_ARM
 175     MPV_common_init_arm(s);
 176 #elif HAVE_ALTIVEC
 177     MPV_common_init_altivec(s);
 178 #elif ARCH_BFIN
 179     MPV_common_init_bfin(s);
 180 #endif
 181
 182     /* load & permutate scantables
 183        note: only wmv uses different ones
 184     */
 185     if(s->alternate_scan){
 186         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 187         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 188     }else{
 189         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 190         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 191     }
 192     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 193     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 194
 195     return 0;
 196 }
 197
 198 void ff_copy_picture(Picture *dst, Picture *src){
 199     *dst = *src;
 200     dst->type= FF_BUFFER_TYPE_COPY;
 201 }
 202
 203 /**
 204  * Release a frame buffer
 205  */
 206 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 207 {
 208     s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 209     av_freep(&pic->hwaccel_picture_private);
 210 }
 211
 212 /**
 213  * Allocate a frame buffer
 214  */
 215 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
 216 {
 217     int r;
 218
 219     if (s->avctx->hwaccel) {
 220         assert(!pic->hwaccel_picture_private);
 221         if (s->avctx->hwaccel->priv_data_size) {
 222             pic->hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
 223             if (!pic->hwaccel_picture_private) {
 224                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
 225                 return -1;
 226             }
 227         }
 228     }
 229
 230     r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 231
 232     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
 233         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 234         av_freep(&pic->hwaccel_picture_private);
 235         return -1;
 236     }
 237
 238     if (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])) {
 239         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
 240         free_frame_buffer(s, pic);
 241         return -1;
 242     }
 243
 244     if (pic->linesize[1] != pic->linesize[2]) {
 245         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
 246         free_frame_buffer(s, pic);
 247         return -1;
 248     }
 249
 250     return 0;
 251 }
 252
 253 /**
 254  * allocates a Picture
 255  * The pixels are allocated/set by calling get_buffer() if shared=0
 256  */
 257 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 258     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
 259     const int mb_array_size= s->mb_stride*s->mb_height;
 260     const int b8_array_size= s->b8_stride*s->mb_height*2;
 261     const int b4_array_size= s->b4_stride*s->mb_height*4;
 262     int i;
 263     int r= -1;
 264
 265     if(shared){
 266         assert(pic->data[0]);
 267         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 268         pic->type= FF_BUFFER_TYPE_SHARED;
 269     }else{
 270         assert(!pic->data[0]);
 271
 272         if (alloc_frame_buffer(s, pic) < 0)
 273             return -1;
 274
 275         s->linesize  = pic->linesize[0];
 276         s->uvlinesize= pic->linesize[1];
 277     }
 278
 279     if(pic->qscale_table==NULL){
 280         if (s->encoding) {
 281             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var   , mb_array_size * sizeof(int16_t)  , fail)
 282             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var, mb_array_size * sizeof(int16_t)  , fail)
 283             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean  , mb_array_size * sizeof(int8_t )  , fail)
 284         }
 285
 286         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2, fail) //the +2 is for the slice end check
 287         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table , mb_array_size * sizeof(uint8_t)  , fail)
 288         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t), fail)
 289         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
 290         if(s->out_format == FMT_H264){
 291             for(i=0; i<2; i++){
 292                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t), fail)
 293                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 294                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 295             }
 296             pic->motion_subsample_log2= 2;
 297         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
 298             for(i=0; i<2; i++){
 299                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t), fail)
 300                 pic->motion_val[i]= pic->motion_val_base[i]+4;
 301                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
 302             }
 303             pic->motion_subsample_log2= 3;
 304         }
 305         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 306             FF_ALLOCZ_OR_GOTO(s->avctx, pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6, fail)
 307         }
 308         pic->qstride= s->mb_stride;
 309         FF_ALLOCZ_OR_GOTO(s->avctx, pic->pan_scan , 1 * sizeof(AVPanScan), fail)
 310     }
 311
 312     /* It might be nicer if the application would keep track of these
 313      * but it would require an API change. */
 314     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 315     s->prev_pict_types[0]= s->dropable ? FF_B_TYPE : s->pict_type;
 316     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == FF_B_TYPE)
 317         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
 318
 319     return 0;
 320 fail: //for the FF_ALLOCZ_OR_GOTO macro
 321     if(r>=0)
 322         free_frame_buffer(s, pic);
 323     return -1;
 324 }
 325
 326 /**
 327  * deallocates a picture
 328  */
 329 static void free_picture(MpegEncContext *s, Picture *pic){
 330     int i;
 331
 332     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 333         free_frame_buffer(s, pic);
 334     }
 335
 336     av_freep(&pic->mb_var);
 337     av_freep(&pic->mc_mb_var);
 338     av_freep(&pic->mb_mean);
 339     av_freep(&pic->mbskip_table);
 340     av_freep(&pic->qscale_table);
 341     av_freep(&pic->mb_type_base);
 342     av_freep(&pic->dct_coeff);
 343     av_freep(&pic->pan_scan);
 344     pic->mb_type= NULL;
 345     for(i=0; i<2; i++){
 346         av_freep(&pic->motion_val_base[i]);
 347         av_freep(&pic->ref_index[i]);
 348     }
 349
 350     if(pic->type == FF_BUFFER_TYPE_SHARED){
 351         for(i=0; i<4; i++){
 352             pic->base[i]=
 353             pic->data[i]= NULL;
 354         }
 355         pic->type= 0;
 356     }
 357 }
 358
 359 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
 360     int y_size = s->b8_stride * (2 * s->mb_height + 1);
 361     int c_size = s->mb_stride * (s->mb_height + 1);
 362     int yc_size = y_size + 2 * c_size;
 363     int i;
 364
 365     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
 366     FF_ALLOCZ_OR_GOTO(s->avctx, s->allocated_edge_emu_buffer, (s->width+64)*2*21*2, fail); //(width + edge + align)*interlaced*MBsize*tolerance
 367     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
 368
 369      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
 370     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t), fail)
 371     s->me.temp=         s->me.scratchpad;
 372     s->rd_scratchpad=   s->me.scratchpad;
 373     s->b_scratchpad=    s->me.scratchpad;
 374     s->obmc_scratchpad= s->me.scratchpad + 16;
 375     if (s->encoding) {
 376         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map      , ME_MAP_SIZE*sizeof(uint32_t), fail)
 377         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t), fail)
 378         if(s->avctx->noise_reduction){
 379             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum, 2 * 64 * sizeof(int), fail)
 380         }
 381     }
 382     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64*12*2 * sizeof(DCTELEM), fail)
 383     s->block= s->blocks[0];
 384
 385     for(i=0;i<12;i++){
 386         s->pblocks[i] = &s->block[i];
 387     }
 388
 389     if (s->out_format == FMT_H263) {
 390         /* ac values */
 391         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base, yc_size * sizeof(int16_t) * 16, fail);
 392         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 393         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 394         s->ac_val[2] = s->ac_val[1] + c_size;
 395     }
 396
 397     return 0;
 398 fail:
 399     return -1; //free() through MPV_common_end()
 400 }
 401
 402 static void free_duplicate_context(MpegEncContext *s){
 403     if(s==NULL) return;
 404
 405     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 406     av_freep(&s->me.scratchpad);
 407     s->me.temp=
 408     s->rd_scratchpad=
 409     s->b_scratchpad=
 410     s->obmc_scratchpad= NULL;
 411
 412     av_freep(&s->dct_error_sum);
 413     av_freep(&s->me.map);
 414     av_freep(&s->me.score_map);
 415     av_freep(&s->blocks);
 416     av_freep(&s->ac_val_base);
 417     s->block= NULL;
 418 }
 419
 420 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 421 #define COPY(a) bak->a= src->a
 422     COPY(allocated_edge_emu_buffer);
 423     COPY(edge_emu_buffer);
 424     COPY(me.scratchpad);
 425     COPY(me.temp);
 426     COPY(rd_scratchpad);
 427     COPY(b_scratchpad);
 428     COPY(obmc_scratchpad);
 429     COPY(me.map);
 430     COPY(me.score_map);
 431     COPY(blocks);
 432     COPY(block);
 433     COPY(start_mb_y);
 434     COPY(end_mb_y);
 435     COPY(me.map_generation);
 436     COPY(pb);
 437     COPY(dct_error_sum);
 438     COPY(dct_count[0]);
 439     COPY(dct_count[1]);
 440     COPY(ac_val_base);
 441     COPY(ac_val[0]);
 442     COPY(ac_val[1]);
 443     COPY(ac_val[2]);
 444 #undef COPY
 445 }
 446
 447 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
 448     MpegEncContext bak;
 449     int i;
 450     //FIXME copy only needed parts
 451 //START_TIMER
 452     backup_duplicate_context(&bak, dst);
 453     memcpy(dst, src, sizeof(MpegEncContext));
 454     backup_duplicate_context(dst, &bak);
 455     for(i=0;i<12;i++){
 456         dst->pblocks[i] = &dst->block[i];
 457     }
 458 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 459 }
 460
 461 /**
 462  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
 463  * the changed fields will not depend upon the prior state of the MpegEncContext.
 464  */
 465 void MPV_common_defaults(MpegEncContext *s){
 466     s->y_dc_scale_table=
 467     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 468     s->chroma_qscale_table= ff_default_chroma_qscale_table;
 469     s->progressive_frame= 1;
 470     s->progressive_sequence= 1;
 471     s->picture_structure= PICT_FRAME;
 472
 473     s->coded_picture_number = 0;
 474     s->picture_number = 0;
 475     s->input_picture_number = 0;
 476
 477     s->picture_in_gop_number = 0;
 478
 479     s->f_code = 1;
 480     s->b_code = 1;
 481 }
 482
 483 /**
 484  * sets the given MpegEncContext to defaults for decoding.
 485  * the changed fields will not depend upon the prior state of the MpegEncContext.
 486  */
 487 void MPV_decode_defaults(MpegEncContext *s){
 488     MPV_common_defaults(s);
 489 }
 490
 491 /**
 492  * init common structure for both encoder and decoder.
 493  * this assumes that some variables like width/height are already set
 494  */
 495 av_cold int MPV_common_init(MpegEncContext *s)
 496 {
 497     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
 498
 499     if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
 500         s->mb_height = (s->height + 31) / 32 * 2;
 501     else if (s->codec_id != CODEC_ID_H264)
 502         s->mb_height = (s->height + 15) / 16;
 503
 504     if(s->avctx->pix_fmt == PIX_FMT_NONE){
 505         av_log(s->avctx, AV_LOG_ERROR, "decoding to PIX_FMT_NONE is not supported.\n");
 506         return -1;
 507     }
 508
 509     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
 510         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
 511         return -1;
 512     }
 513
 514     if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
 515         return -1;
 516
 517     dsputil_init(&s->dsp, s->avctx);
 518     ff_dct_common_init(s);
 519
 520     s->flags= s->avctx->flags;
 521     s->flags2= s->avctx->flags2;
 522
 523     s->mb_width  = (s->width  + 15) / 16;
 524     s->mb_stride = s->mb_width + 1;
 525     s->b8_stride = s->mb_width*2 + 1;
 526     s->b4_stride = s->mb_width*4 + 1;
 527     mb_array_size= s->mb_height * s->mb_stride;
 528     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
 529
 530     /* set chroma shifts */
 531     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
 532                                                     &(s->chroma_y_shift) );
 533
 534     /* set default edge pos, will be overriden in decode_header if needed */
 535     s->h_edge_pos= s->mb_width*16;
 536     s->v_edge_pos= s->mb_height*16;
 537
 538     s->mb_num = s->mb_width * s->mb_height;
 539
 540     s->block_wrap[0]=
 541     s->block_wrap[1]=
 542     s->block_wrap[2]=
 543     s->block_wrap[3]= s->b8_stride;
 544     s->block_wrap[4]=
 545     s->block_wrap[5]= s->mb_stride;
 546
 547     y_size = s->b8_stride * (2 * s->mb_height + 1);
 548     c_size = s->mb_stride * (s->mb_height + 1);
 549     yc_size = y_size + 2 * c_size;
 550
 551     /* convert fourcc to upper case */
 552     s->codec_tag = ff_toupper4(s->avctx->codec_tag);
 553
 554     s->stream_codec_tag = ff_toupper4(s->avctx->stream_codec_tag);
 555
 556     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 557
 558     FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
 559     for(y=0; y<s->mb_height; y++){
 560         for(x=0; x<s->mb_width; x++){
 561             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 562         }
 563     }
 564     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 565
 566     if (s->encoding) {
 567         /* Allocate MV tables */
 568         FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
 569         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 570         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
 571         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 572         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
 573         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
 574         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 575         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 576         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 577         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 578         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 579         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 580
 581         if(s->msmpeg4_version){
 582             FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
 583         }
 584         FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 585
 586         /* Allocate MB type table */
 587         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
 588
 589         FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
 590
 591         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
 592         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
 593         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 594         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
 595         FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 596         FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
 597
 598         if(s->avctx->noise_reduction){
 599             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
 600         }
 601     }
 602     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
 603     for(i = 0; i < MAX_PICTURE_COUNT; i++) {
 604         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
 605     }
 606
 607     FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
 608
 609     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
 610         /* interlaced direct mode decoding tables */
 611             for(i=0; i<2; i++){
 612                 int j, k;
 613                 for(j=0; j<2; j++){
 614                     for(k=0; k<2; k++){
 615                         FF_ALLOCZ_OR_GOTO(s->avctx,    s->b_field_mv_table_base[i][j][k], mv_table_size * 2 * sizeof(int16_t), fail)
 616                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1;
 617                     }
 618                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j], mb_array_size * 2 * sizeof(uint8_t), fail)
 619                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j], mv_table_size * 2 * sizeof(int16_t), fail)
 620                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]+ s->mb_stride + 1;
 621                 }
 622                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
 623             }
 624     }
 625     if (s->out_format == FMT_H263) {
 626         /* cbp values */
 627         FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
 628         s->coded_block= s->coded_block_base + s->b8_stride + 1;
 629
 630         /* cbp, ac_pred, pred_dir */
 631         FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
 632         FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
 633     }
 634
 635     if (s->h263_pred || s->h263_plus || !s->encoding) {
 636         /* dc values */
 637         //MN: we need these for error resilience of intra-frames
 638         FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
 639         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 640         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 641         s->dc_val[2] = s->dc_val[1] + c_size;
 642         for(i=0;i<yc_size;i++)
 643             s->dc_val_base[i] = 1024;
 644     }
 645
 646     /* which mb is a intra block */
 647     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
 648     memset(s->mbintra_table, 1, mb_array_size);
 649
 650     /* init macroblock skip table */
 651     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
 652     //Note the +1 is for a quicker mpeg4 slice_end detection
 653     FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
 654
 655     s->parse_context.state= -1;
 656     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
 657        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 658        s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 659        s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
 660     }
 661
 662     s->context_initialized = 1;
 663
 664     s->thread_context[0]= s;
 665     threads = s->avctx->thread_count;
 666
 667     for(i=1; i<threads; i++){
 668         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
 669         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 670     }
 671
 672     for(i=0; i<threads; i++){
 673         if(init_duplicate_context(s->thread_context[i], s) < 0)
 674            goto fail;
 675         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
 676         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
 677     }
 678
 679     return 0;
 680  fail:
 681     MPV_common_end(s);
 682     return -1;
 683 }
 684
 685 /* init common structure for both encoder and decoder */
 686 void MPV_common_end(MpegEncContext *s)
 687 {
 688     int i, j, k;
 689
 690     for(i=0; i<s->avctx->thread_count; i++){
 691         free_duplicate_context(s->thread_context[i]);
 692     }
 693     for(i=1; i<s->avctx->thread_count; i++){
 694         av_freep(&s->thread_context[i]);
 695     }
 696
 697     av_freep(&s->parse_context.buffer);
 698     s->parse_context.buffer_size=0;
 699
 700     av_freep(&s->mb_type);
 701     av_freep(&s->p_mv_table_base);
 702     av_freep(&s->b_forw_mv_table_base);
 703     av_freep(&s->b_back_mv_table_base);
 704     av_freep(&s->b_bidir_forw_mv_table_base);
 705     av_freep(&s->b_bidir_back_mv_table_base);
 706     av_freep(&s->b_direct_mv_table_base);
 707     s->p_mv_table= NULL;
 708     s->b_forw_mv_table= NULL;
 709     s->b_back_mv_table= NULL;
 710     s->b_bidir_forw_mv_table= NULL;
 711     s->b_bidir_back_mv_table= NULL;
 712     s->b_direct_mv_table= NULL;
 713     for(i=0; i<2; i++){
 714         for(j=0; j<2; j++){
 715             for(k=0; k<2; k++){
 716                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 717                 s->b_field_mv_table[i][j][k]=NULL;
 718             }
 719             av_freep(&s->b_field_select_table[i][j]);
 720             av_freep(&s->p_field_mv_table_base[i][j]);
 721             s->p_field_mv_table[i][j]=NULL;
 722         }
 723         av_freep(&s->p_field_select_table[i]);
 724     }
 725
 726     av_freep(&s->dc_val_base);
 727     av_freep(&s->coded_block_base);
 728     av_freep(&s->mbintra_table);
 729     av_freep(&s->cbp_table);
 730     av_freep(&s->pred_dir_table);
 731
 732     av_freep(&s->mbskip_table);
 733     av_freep(&s->prev_pict_types);
 734     av_freep(&s->bitstream_buffer);
 735     s->allocated_bitstream_buffer_size=0;
 736
 737     av_freep(&s->avctx->stats_out);
 738     av_freep(&s->ac_stats);
 739     av_freep(&s->error_status_table);
 740     av_freep(&s->mb_index2xy);
 741     av_freep(&s->lambda_table);
 742     av_freep(&s->q_intra_matrix);
 743     av_freep(&s->q_inter_matrix);
 744     av_freep(&s->q_intra_matrix16);
 745     av_freep(&s->q_inter_matrix16);
 746     av_freep(&s->input_picture);
 747     av_freep(&s->reordered_input_picture);
 748     av_freep(&s->dct_offset);
 749
 750     if(s->picture){
 751         for(i=0; i<MAX_PICTURE_COUNT; i++){
 752             free_picture(s, &s->picture[i]);
 753         }
 754     }
 755     av_freep(&s->picture);
 756     s->context_initialized = 0;
 757     s->last_picture_ptr=
 758     s->next_picture_ptr=
 759     s->current_picture_ptr= NULL;
 760     s->linesize= s->uvlinesize= 0;
 761
 762     for(i=0; i<3; i++)
 763         av_freep(&s->visualization_buffer[i]);
 764
 765     avcodec_default_free_buffers(s->avctx);
 766 }
 767
 768 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
 769 {
 770     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 771     uint8_t index_run[MAX_RUN+1];
 772     int last, run, level, start, end, i;
 773
 774     /* If table is static, we can quit if rl->max_level[0] is not NULL */
 775     if(static_store && rl->max_level[0])
 776         return;
 777
 778     /* compute max_level[], max_run[] and index_run[] */
 779     for(last=0;last<2;last++) {
 780         if (last == 0) {
 781             start = 0;
 782             end = rl->last;
 783         } else {
 784             start = rl->last;
 785             end = rl->n;
 786         }
 787
 788         memset(max_level, 0, MAX_RUN + 1);
 789         memset(max_run, 0, MAX_LEVEL + 1);
 790         memset(index_run, rl->n, MAX_RUN + 1);
 791         for(i=start;i<end;i++) {
 792             run = rl->table_run[i];
 793             level = rl->table_level[i];
 794             if (index_run[run] == rl->n)
 795                 index_run[run] = i;
 796             if (level > max_level[run])
 797                 max_level[run] = level;
 798             if (run > max_run[level])
 799                 max_run[level] = run;
 800         }
 801         if(static_store)
 802             rl->max_level[last] = static_store[last];
 803         else
 804             rl->max_level[last] = av_malloc(MAX_RUN + 1);
 805         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 806         if(static_store)
 807             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
 808         else
 809             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 810         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 811         if(static_store)
 812             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
 813         else
 814             rl->index_run[last] = av_malloc(MAX_RUN + 1);
 815         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 816     }
 817 }
 818
 819 void init_vlc_rl(RLTable *rl)
 820 {
 821     int i, q;
 822
 823     for(q=0; q<32; q++){
 824         int qmul= q*2;
 825         int qadd= (q-1)|1;
 826
 827         if(q==0){
 828             qmul=1;
 829             qadd=0;
 830         }
 831         for(i=0; i<rl->vlc.table_size; i++){
 832             int code= rl->vlc.table[i][0];
 833             int len = rl->vlc.table[i][1];
 834             int level, run;
 835
 836             if(len==0){ // illegal code
 837                 run= 66;
 838                 level= MAX_LEVEL;
 839             }else if(len<0){ //more bits needed
 840                 run= 0;
 841                 level= code;
 842             }else{
 843                 if(code==rl->n){ //esc
 844                     run= 66;
 845                     level= 0;
 846                 }else{
 847                     run=   rl->table_run  [code] + 1;
 848                     level= rl->table_level[code] * qmul + qadd;
 849                     if(code >= rl->last) run+=192;
 850                 }
 851             }
 852             rl->rl_vlc[q][i].len= len;
 853             rl->rl_vlc[q][i].level= level;
 854             rl->rl_vlc[q][i].run= run;
 855         }
 856     }
 857 }
 858
 859 int ff_find_unused_picture(MpegEncContext *s, int shared){
 860     int i;
 861
 862     if(shared){
 863         for(i=0; i<MAX_PICTURE_COUNT; i++){
 864             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
 865         }
 866     }else{
 867         for(i=0; i<MAX_PICTURE_COUNT; i++){
 868             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
 869         }
 870         for(i=0; i<MAX_PICTURE_COUNT; i++){
 871             if(s->picture[i].data[0]==NULL) return i;
 872         }
 873     }
 874
 875     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
 876     /* We could return -1, but the codec would crash trying to draw into a
 877      * non-existing frame anyway. This is safer than waiting for a random crash.
 878      * Also the return of this is never useful, an encoder must only allocate
 879      * as much as allowed in the specification. This has no relationship to how
 880      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
 881      * enough for such valid streams).
 882      * Plus, a decoder has to check stream validity and remove frames if too
 883      * many reference frames are around. Waiting for "OOM" is not correct at
 884      * all. Similarly, missing reference frames have to be replaced by
 885      * interpolated/MC frames, anything else is a bug in the codec ...
 886      */
 887     abort();
 888     return -1;
 889 }
 890
 891 static void update_noise_reduction(MpegEncContext *s){
 892     int intra, i;
 893
 894     for(intra=0; intra<2; intra++){
 895         if(s->dct_count[intra] > (1<<16)){
 896             for(i=0; i<64; i++){
 897                 s->dct_error_sum[intra][i] >>=1;
 898             }
 899             s->dct_count[intra] >>= 1;
 900         }
 901
 902         for(i=0; i<64; i++){
 903             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
 904         }
 905     }
 906 }
 907
 908 /**
 909  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
 910  */
 911 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 912 {
 913     int i;
 914     Picture *pic;
 915     s->mb_skipped = 0;
 916
 917     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 918
 919     /* mark&release old frames */
 920     if (s->pict_type != FF_B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
 921       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
 922           free_frame_buffer(s, s->last_picture_ptr);
 923
 924         /* release forgotten pictures */
 925         /* if(mpeg124/h263) */
 926         if(!s->encoding){
 927             for(i=0; i<MAX_PICTURE_COUNT; i++){
 928                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 929                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
 930                     free_frame_buffer(s, &s->picture[i]);
 931                 }
 932             }
 933         }
 934       }
 935     }
 936
 937     if(!s->encoding){
 938         /* release non reference frames */
 939         for(i=0; i<MAX_PICTURE_COUNT; i++){
 940             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
 941                 free_frame_buffer(s, &s->picture[i]);
 942             }
 943         }
 944
 945         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
 946             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
 947         else{
 948             i= ff_find_unused_picture(s, 0);
 949             pic= &s->picture[i];
 950         }
 951
 952         pic->reference= 0;
 953         if (!s->dropable){
 954             if (s->codec_id == CODEC_ID_H264)
 955                 pic->reference = s->picture_structure;
 956             else if (s->pict_type != FF_B_TYPE)
 957                 pic->reference = 3;
 958         }
 959
 960         pic->coded_picture_number= s->coded_picture_number++;
 961
 962         if(ff_alloc_picture(s, pic, 0) < 0)
 963             return -1;
 964
 965         s->current_picture_ptr= pic;
 966         //FIXME use only the vars from current_pic
 967         s->current_picture_ptr->top_field_first= s->top_field_first;
 968         if(s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) {
 969             if(s->picture_structure != PICT_FRAME)
 970                 s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
 971         }
 972         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
 973     }
 974
 975     s->current_picture_ptr->pict_type= s->pict_type;
 976 //    if(s->flags && CODEC_FLAG_QSCALE)
 977   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
 978     s->current_picture_ptr->key_frame= s->pict_type == FF_I_TYPE;
 979
 980     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
 981
 982     if (s->pict_type != FF_B_TYPE) {
 983         s->last_picture_ptr= s->next_picture_ptr;
 984         if(!s->dropable)
 985             s->next_picture_ptr= s->current_picture_ptr;
 986     }
 987 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
 988         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
 989         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
 990         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
 991         s->pict_type, s->dropable);*/
 992
 993     if(s->codec_id != CODEC_ID_H264){
 994         if((s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && s->pict_type!=FF_I_TYPE){
 995             av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
 996             /* Allocate a dummy frame */
 997             i= ff_find_unused_picture(s, 0);
 998             s->last_picture_ptr= &s->picture[i];
 999             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
1000                 return -1;
1001         }
1002         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==FF_B_TYPE){
1003             /* Allocate a dummy frame */
1004             i= ff_find_unused_picture(s, 0);
1005             s->next_picture_ptr= &s->picture[i];
1006             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
1007                 return -1;
1008         }
1009     }
1010
1011     if(s->last_picture_ptr) ff_copy_picture(&s->last_picture, s->last_picture_ptr);
1012     if(s->next_picture_ptr) ff_copy_picture(&s->next_picture, s->next_picture_ptr);
1013
1014     assert(s->pict_type == FF_I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1015
1016     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
1017         int i;
1018         for(i=0; i<4; i++){
1019             if(s->picture_structure == PICT_BOTTOM_FIELD){
1020                  s->current_picture.data[i] += s->current_picture.linesize[i];
1021             }
1022             s->current_picture.linesize[i] *= 2;
1023             s->last_picture.linesize[i] *=2;
1024             s->next_picture.linesize[i] *=2;
1025         }
1026     }
1027
1028     s->hurry_up= s->avctx->hurry_up;
1029     s->error_recognition= avctx->error_recognition;
1030
1031     /* set dequantizer, we can't do it during init as it might change for mpeg4
1032        and we can't do it in the header decode as init is not called for mpeg4 there yet */
1033     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1034         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1035         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1036     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1037         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1038         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1039     }else{
1040         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1041         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1042     }
1043
1044     if(s->dct_error_sum){
1045         assert(s->avctx->noise_reduction && s->encoding);
1046
1047         update_noise_reduction(s);
1048     }
1049
1050     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1051         return ff_xvmc_field_start(s, avctx);
1052
1053     return 0;
1054 }
1055
1056 /* generic function for encode/decode called after a frame has been coded/decoded */
1057 void MPV_frame_end(MpegEncContext *s)
1058 {
1059     int i;
1060     /* draw edge for correct motion prediction if outside */
1061     //just to make sure that all data is rendered.
1062     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1063         ff_xvmc_field_end(s);
1064     }else if(!s->avctx->hwaccel
1065        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1066        && s->unrestricted_mv
1067        && s->current_picture.reference
1068        && !s->intra_only
1069        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1070             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1071             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1072             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1073     }
1074     emms_c();
1075
1076     s->last_pict_type    = s->pict_type;
1077     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1078     if(s->pict_type!=FF_B_TYPE){
1079         s->last_non_b_pict_type= s->pict_type;
1080     }
1081 #if 0
1082         /* copy back current_picture variables */
1083     for(i=0; i<MAX_PICTURE_COUNT; i++){
1084         if(s->picture[i].data[0] == s->current_picture.data[0]){
1085             s->picture[i]= s->current_picture;
1086             break;
1087         }
1088     }
1089     assert(i<MAX_PICTURE_COUNT);
1090 #endif
1091
1092     if(s->encoding){
1093         /* release non-reference frames */
1094         for(i=0; i<MAX_PICTURE_COUNT; i++){
1095             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1096                 free_frame_buffer(s, &s->picture[i]);
1097             }
1098         }
1099     }
1100     // clear copies, to avoid confusion
1101 #if 0
1102     memset(&s->last_picture, 0, sizeof(Picture));
1103     memset(&s->next_picture, 0, sizeof(Picture));
1104     memset(&s->current_picture, 0, sizeof(Picture));
1105 #endif
1106     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1107 }
1108
1109 /**
1110  * draws an line from (ex, ey) -> (sx, sy).
1111  * @param w width of the image
1112  * @param h height of the image
1113  * @param stride stride/linesize of the image
1114  * @param color color of the arrow
1115  */
1116 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1117     int x, y, fr, f;
1118
1119     sx= av_clip(sx, 0, w-1);
1120     sy= av_clip(sy, 0, h-1);
1121     ex= av_clip(ex, 0, w-1);
1122     ey= av_clip(ey, 0, h-1);
1123
1124     buf[sy*stride + sx]+= color;
1125
1126     if(FFABS(ex - sx) > FFABS(ey - sy)){
1127         if(sx > ex){
1128             FFSWAP(int, sx, ex);
1129             FFSWAP(int, sy, ey);
1130         }
1131         buf+= sx + sy*stride;
1132         ex-= sx;
1133         f= ((ey-sy)<<16)/ex;
1134         for(x= 0; x <= ex; x++){
1135             y = (x*f)>>16;
1136             fr= (x*f)&0xFFFF;
1137             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1138             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1139         }
1140     }else{
1141         if(sy > ey){
1142             FFSWAP(int, sx, ex);
1143             FFSWAP(int, sy, ey);
1144         }
1145         buf+= sx + sy*stride;
1146         ey-= sy;
1147         if(ey) f= ((ex-sx)<<16)/ey;
1148         else   f= 0;
1149         for(y= 0; y <= ey; y++){
1150             x = (y*f)>>16;
1151             fr= (y*f)&0xFFFF;
1152             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;
1153             buf[y*stride + x+1]+= (color*         fr )>>16;
1154         }
1155     }
1156 }
1157
1158 /**
1159  * draws an arrow from (ex, ey) -> (sx, sy).
1160  * @param w width of the image
1161  * @param h height of the image
1162  * @param stride stride/linesize of the image
1163  * @param color color of the arrow
1164  */
1165 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1166     int dx,dy;
1167
1168     sx= av_clip(sx, -100, w+100);
1169     sy= av_clip(sy, -100, h+100);
1170     ex= av_clip(ex, -100, w+100);
1171     ey= av_clip(ey, -100, h+100);
1172
1173     dx= ex - sx;
1174     dy= ey - sy;
1175
1176     if(dx*dx + dy*dy > 3*3){
1177         int rx=  dx + dy;
1178         int ry= -dx + dy;
1179         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1180
1181         //FIXME subpixel accuracy
1182         rx= ROUNDED_DIV(rx*3<<4, length);
1183         ry= ROUNDED_DIV(ry*3<<4, length);
1184
1185         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1186         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1187     }
1188     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1189 }
1190
1191 /**
1192  * prints debuging info for the given picture.
1193  */
1194 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1195
1196     if(s->avctx->hwaccel || !pict || !pict->mb_type) return;
1197
1198     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1199         int x,y;
1200
1201         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1202         switch (pict->pict_type) {
1203             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1204             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1205             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1206             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1207             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1208             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1209         }
1210         for(y=0; y<s->mb_height; y++){
1211             for(x=0; x<s->mb_width; x++){
1212                 if(s->avctx->debug&FF_DEBUG_SKIP){
1213                     int count= s->mbskip_table[x + y*s->mb_stride];
1214                     if(count>9) count=9;
1215                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1216                 }
1217                 if(s->avctx->debug&FF_DEBUG_QP){
1218                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1219                 }
1220                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1221                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1222                     //Type & MV direction
1223                     if(IS_PCM(mb_type))
1224                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1225                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1226                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1227                     else if(IS_INTRA4x4(mb_type))
1228                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1229                     else if(IS_INTRA16x16(mb_type))
1230                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1231                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1232                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1233                     else if(IS_DIRECT(mb_type))
1234                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1235                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1236                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1237                     else if(IS_GMC(mb_type))
1238                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1239                     else if(IS_SKIP(mb_type))
1240                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1241                     else if(!USES_LIST(mb_type, 1))
1242                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1243                     else if(!USES_LIST(mb_type, 0))
1244                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1245                     else{
1246                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1247                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1248                     }
1249
1250                     //segmentation
1251                     if(IS_8X8(mb_type))
1252                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1253                     else if(IS_16X8(mb_type))
1254                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1255                     else if(IS_8X16(mb_type))
1256                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1257                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1258                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1259                     else
1260                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1261
1262
1263                     if(IS_INTERLACED(mb_type))
1264                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1265                     else
1266                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1267                 }
1268 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1269             }
1270             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1271         }
1272     }
1273
1274     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1275         const int shift= 1 + s->quarter_sample;
1276         int mb_y;
1277         uint8_t *ptr;
1278         int i;
1279         int h_chroma_shift, v_chroma_shift, block_height;
1280         const int width = s->avctx->width;
1281         const int height= s->avctx->height;
1282         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1283         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1284         s->low_delay=0; //needed to see the vectors without trashing the buffers
1285
1286         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1287         for(i=0; i<3; i++){
1288             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1289             pict->data[i]= s->visualization_buffer[i];
1290         }
1291         pict->type= FF_BUFFER_TYPE_COPY;
1292         ptr= pict->data[0];
1293         block_height = 16>>v_chroma_shift;
1294
1295         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1296             int mb_x;
1297             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1298                 const int mb_index= mb_x + mb_y*s->mb_stride;
1299                 if((s->avctx->debug_mv) && pict->motion_val){
1300                   int type;
1301                   for(type=0; type<3; type++){
1302                     int direction = 0;
1303                     switch (type) {
1304                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1305                                 continue;
1306                               direction = 0;
1307                               break;
1308                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1309                                 continue;
1310                               direction = 0;
1311                               break;
1312                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1313                                 continue;
1314                               direction = 1;
1315                               break;
1316                     }
1317                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1318                         continue;
1319
1320                     if(IS_8X8(pict->mb_type[mb_index])){
1321                       int i;
1322                       for(i=0; i<4; i++){
1323                         int sx= mb_x*16 + 4 + 8*(i&1);
1324                         int sy= mb_y*16 + 4 + 8*(i>>1);
1325                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1326                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1327                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1328                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1329                       }
1330                     }else if(IS_16X8(pict->mb_type[mb_index])){
1331                       int i;
1332                       for(i=0; i<2; i++){
1333                         int sx=mb_x*16 + 8;
1334                         int sy=mb_y*16 + 4 + 8*i;
1335                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1336                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1337                         int my=(pict->motion_val[direction][xy][1]>>shift);
1338
1339                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1340                             my*=2;
1341
1342                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1343                       }
1344                     }else if(IS_8X16(pict->mb_type[mb_index])){
1345                       int i;
1346                       for(i=0; i<2; i++){
1347                         int sx=mb_x*16 + 4 + 8*i;
1348                         int sy=mb_y*16 + 8;
1349                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1350                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1351                         int my=(pict->motion_val[direction][xy][1]>>shift);
1352
1353                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1354                             my*=2;
1355
1356                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1357                       }
1358                     }else{
1359                       int sx= mb_x*16 + 8;
1360                       int sy= mb_y*16 + 8;
1361                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1362                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1363                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1364                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1365                     }
1366                   }
1367                 }
1368                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1369                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1370                     int y;
1371                     for(y=0; y<block_height; y++){
1372                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= c;
1373                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= c;
1374                     }
1375                 }
1376                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1377                     int mb_type= pict->mb_type[mb_index];
1378                     uint64_t u,v;
1379                     int y;
1380 #define COLOR(theta, r)\
1381 u= (int)(128 + r*cos(theta*3.141592/180));\
1382 v= (int)(128 + r*sin(theta*3.141592/180));
1383
1384
1385                     u=v=128;
1386                     if(IS_PCM(mb_type)){
1387                         COLOR(120,48)
1388                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1389                         COLOR(30,48)
1390                     }else if(IS_INTRA4x4(mb_type)){
1391                         COLOR(90,48)
1392                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1393 //                        COLOR(120,48)
1394                     }else if(IS_DIRECT(mb_type)){
1395                         COLOR(150,48)
1396                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1397                         COLOR(170,48)
1398                     }else if(IS_GMC(mb_type)){
1399                         COLOR(190,48)
1400                     }else if(IS_SKIP(mb_type)){
1401 //                        COLOR(180,48)
1402                     }else if(!USES_LIST(mb_type, 1)){
1403                         COLOR(240,48)
1404                     }else if(!USES_LIST(mb_type, 0)){
1405                         COLOR(0,48)
1406                     }else{
1407                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1408                         COLOR(300,48)
1409                     }
1410
1411                     u*= 0x0101010101010101ULL;
1412                     v*= 0x0101010101010101ULL;
1413                     for(y=0; y<block_height; y++){
1414                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= u;
1415                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= v;
1416                     }
1417
1418                     //segmentation
1419                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1420                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1421                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1422                     }
1423                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1424                         for(y=0; y<16; y++)
1425                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1426                     }
1427                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1428                         int dm= 1 << (mv_sample_log2-2);
1429                         for(i=0; i<4; i++){
1430                             int sx= mb_x*16 + 8*(i&1);
1431                             int sy= mb_y*16 + 8*(i>>1);
1432                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1433                             //FIXME bidir
1434                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1435                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1436                                 for(y=0; y<8; y++)
1437                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1438                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1439                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1440                         }
1441                     }
1442
1443                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1444                         // hmm
1445                     }
1446                 }
1447                 s->mbskip_table[mb_index]=0;
1448             }
1449         }
1450     }
1451 }
1452
1453 static inline int hpel_motion_lowres(MpegEncContext *s,
1454                                   uint8_t *dest, uint8_t *src,
1455                                   int field_based, int field_select,
1456                                   int src_x, int src_y,
1457                                   int width, int height, int stride,
1458                                   int h_edge_pos, int v_edge_pos,
1459                                   int w, int h, h264_chroma_mc_func *pix_op,
1460                                   int motion_x, int motion_y)
1461 {
1462     const int lowres= s->avctx->lowres;
1463     const int op_index= FFMIN(lowres, 2);
1464     const int s_mask= (2<<lowres)-1;
1465     int emu=0;
1466     int sx, sy;
1467
1468     if(s->quarter_sample){
1469         motion_x/=2;
1470         motion_y/=2;
1471     }
1472
1473     sx= motion_x & s_mask;
1474     sy= motion_y & s_mask;
1475     src_x += motion_x >> (lowres+1);
1476     src_y += motion_y >> (lowres+1);
1477
1478     src += src_y * stride + src_x;
1479
1480     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1481        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1482         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1483                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1484         src= s->edge_emu_buffer;
1485         emu=1;
1486     }
1487
1488     sx= (sx << 2) >> lowres;
1489     sy= (sy << 2) >> lowres;
1490     if(field_select)
1491         src += s->linesize;
1492     pix_op[op_index](dest, src, stride, h, sx, sy);
1493     return emu;
1494 }
1495
1496 /* apply one mpeg motion vector to the three components */
1497 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1498                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1499                                int field_based, int bottom_field, int field_select,
1500                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1501                                int motion_x, int motion_y, int h, int mb_y)
1502 {
1503     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1504     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1505     const int lowres= s->avctx->lowres;
1506     const int op_index= FFMIN(lowres, 2);
1507     const int block_s= 8>>lowres;
1508     const int s_mask= (2<<lowres)-1;
1509     const int h_edge_pos = s->h_edge_pos >> lowres;
1510     const int v_edge_pos = s->v_edge_pos >> lowres;
1511     linesize   = s->current_picture.linesize[0] << field_based;
1512     uvlinesize = s->current_picture.linesize[1] << field_based;
1513
1514     if(s->quarter_sample){ //FIXME obviously not perfect but qpel will not work in lowres anyway
1515         motion_x/=2;
1516         motion_y/=2;
1517     }
1518
1519     if(field_based){
1520         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1521     }
1522
1523     sx= motion_x & s_mask;
1524     sy= motion_y & s_mask;
1525     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1526     src_y =(   mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1527
1528     if (s->out_format == FMT_H263) {
1529         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1530         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1531         uvsrc_x = src_x>>1;
1532         uvsrc_y = src_y>>1;
1533     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1534         mx = motion_x / 4;
1535         my = motion_y / 4;
1536         uvsx = (2*mx) & s_mask;
1537         uvsy = (2*my) & s_mask;
1538         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1539         uvsrc_y =    mb_y*block_s               + (my >> lowres);
1540     } else {
1541         mx = motion_x / 2;
1542         my = motion_y / 2;
1543         uvsx = mx & s_mask;
1544         uvsy = my & s_mask;
1545         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1546         uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
1547     }
1548
1549     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1550     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1551     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1552
1553     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1554        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1555             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1556                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1557             ptr_y = s->edge_emu_buffer;
1558             if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1559                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1560                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1561                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1562                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1563                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1564                 ptr_cb= uvbuf;
1565                 ptr_cr= uvbuf+16;
1566             }
1567     }
1568
1569     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1570         dest_y += s->linesize;
1571         dest_cb+= s->uvlinesize;
1572         dest_cr+= s->uvlinesize;
1573     }
1574
1575     if(field_select){
1576         ptr_y += s->linesize;
1577         ptr_cb+= s->uvlinesize;
1578         ptr_cr+= s->uvlinesize;
1579     }
1580
1581     sx= (sx << 2) >> lowres;
1582     sy= (sy << 2) >> lowres;
1583     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1584
1585     if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1586         uvsx= (uvsx << 2) >> lowres;
1587         uvsy= (uvsy << 2) >> lowres;
1588         pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1589         pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1590     }
1591     //FIXME h261 lowres loop filter
1592 }
1593
1594 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1595                                      uint8_t *dest_cb, uint8_t *dest_cr,
1596                                      uint8_t **ref_picture,
1597                                      h264_chroma_mc_func *pix_op,
1598                                      int mx, int my){
1599     const int lowres= s->avctx->lowres;
1600     const int op_index= FFMIN(lowres, 2);
1601     const int block_s= 8>>lowres;
1602     const int s_mask= (2<<lowres)-1;
1603     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1604     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1605     int emu=0, src_x, src_y, offset, sx, sy;
1606     uint8_t *ptr;
1607
1608     if(s->quarter_sample){
1609         mx/=2;
1610         my/=2;
1611     }
1612
1613     /* In case of 8X8, we construct a single chroma motion vector
1614        with a special rounding */
1615     mx= ff_h263_round_chroma(mx);
1616     my= ff_h263_round_chroma(my);
1617
1618     sx= mx & s_mask;
1619     sy= my & s_mask;
1620     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1621     src_y = s->mb_y*block_s + (my >> (lowres+1));
1622
1623     offset = src_y * s->uvlinesize + src_x;
1624     ptr = ref_picture[1] + offset;
1625     if(s->flags&CODEC_FLAG_EMU_EDGE){
1626         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1627            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1628             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1629             ptr= s->edge_emu_buffer;
1630             emu=1;
1631         }
1632     }
1633     sx= (sx << 2) >> lowres;
1634     sy= (sy << 2) >> lowres;
1635     pix_op[op_index](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1636
1637     ptr = ref_picture[2] + offset;
1638     if(emu){
1639         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1640         ptr= s->edge_emu_buffer;
1641     }
1642     pix_op[op_index](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1643 }
1644
1645 /**
1646  * motion compensation of a single macroblock
1647  * @param s context
1648  * @param dest_y luma destination pointer
1649  * @param dest_cb chroma cb/u destination pointer
1650  * @param dest_cr chroma cr/v destination pointer
1651  * @param dir direction (0->forward, 1->backward)
1652  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1653  * @param pix_op halfpel motion compensation function (average or put normally)
1654  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1655  */
1656 static inline void MPV_motion_lowres(MpegEncContext *s,
1657                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1658                               int dir, uint8_t **ref_picture,
1659                               h264_chroma_mc_func *pix_op)
1660 {
1661     int mx, my;
1662     int mb_x, mb_y, i;
1663     const int lowres= s->avctx->lowres;
1664     const int block_s= 8>>lowres;
1665
1666     mb_x = s->mb_x;
1667     mb_y = s->mb_y;
1668
1669     switch(s->mv_type) {
1670     case MV_TYPE_16X16:
1671         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1672                     0, 0, 0,
1673                     ref_picture, pix_op,
1674                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y);
1675         break;
1676     case MV_TYPE_8X8:
1677         mx = 0;
1678         my = 0;
1679             for(i=0;i<4;i++) {
1680                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1681                             ref_picture[0], 0, 0,
1682                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1683                             s->width, s->height, s->linesize,
1684                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1685                             block_s, block_s, pix_op,
1686                             s->mv[dir][i][0], s->mv[dir][i][1]);
1687
1688                 mx += s->mv[dir][i][0];
1689                 my += s->mv[dir][i][1];
1690             }
1691
1692         if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1693             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1694         break;
1695     case MV_TYPE_FIELD:
1696         if (s->picture_structure == PICT_FRAME) {
1697             /* top field */
1698             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1699                         1, 0, s->field_select[dir][0],
1700                         ref_picture, pix_op,
1701                         s->mv[dir][0][0], s->mv[dir][0][1], block_s, mb_y);
1702             /* bottom field */
1703             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1704                         1, 1, s->field_select[dir][1],
1705                         ref_picture, pix_op,
1706                         s->mv[dir][1][0], s->mv[dir][1][1], block_s, mb_y);
1707         } else {
1708             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != FF_B_TYPE && !s->first_field){
1709                 ref_picture= s->current_picture_ptr->data;
1710             }
1711
1712             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1713                         0, 0, s->field_select[dir][0],
1714                         ref_picture, pix_op,
1715                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y>>1);
1716         }
1717         break;
1718     case MV_TYPE_16X8:
1719         for(i=0; i<2; i++){
1720             uint8_t ** ref2picture;
1721
1722             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == FF_B_TYPE || s->first_field){
1723                 ref2picture= ref_picture;
1724             }else{
1725                 ref2picture= s->current_picture_ptr->data;
1726             }
1727
1728             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1729                         0, 0, s->field_select[dir][i],
1730                         ref2picture, pix_op,
1731                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s, mb_y>>1);
1732
1733             dest_y += 2*block_s*s->linesize;
1734             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1735             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1736         }
1737         break;
1738     case MV_TYPE_DMV:
1739         if(s->picture_structure == PICT_FRAME){
1740             for(i=0; i<2; i++){
1741                 int j;
1742                 for(j=0; j<2; j++){
1743                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1744                                 1, j, j^i,
1745                                 ref_picture, pix_op,
1746                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s, mb_y);
1747                 }
1748                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1749             }
1750         }else{
1751             for(i=0; i<2; i++){
1752                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1753                             0, 0, s->picture_structure != i+1,
1754                             ref_picture, pix_op,
1755                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s, mb_y>>1);
1756
1757                 // after put we make avg of the same block
1758                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1759
1760                 //opposite parity is always in the same frame if this is second field
1761                 if(!s->first_field){
1762                     ref_picture = s->current_picture_ptr->data;
1763                 }
1764             }
1765         }
1766     break;
1767     default: assert(0);
1768     }
1769 }
1770
1771 /* put block[] to dest[] */
1772 static inline void put_dct(MpegEncContext *s,
1773                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1774 {
1775     s->dct_unquantize_intra(s, block, i, qscale);
1776     s->dsp.idct_put (dest, line_size, block);
1777 }
1778
1779 /* add block[] to dest[] */
1780 static inline void add_dct(MpegEncContext *s,
1781                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1782 {
1783     if (s->block_last_index[i] >= 0) {
1784         s->dsp.idct_add (dest, line_size, block);
1785     }
1786 }
1787
1788 static inline void add_dequant_dct(MpegEncContext *s,
1789                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1790 {
1791     if (s->block_last_index[i] >= 0) {
1792         s->dct_unquantize_inter(s, block, i, qscale);
1793
1794         s->dsp.idct_add (dest, line_size, block);
1795     }
1796 }
1797
1798 /**
1799  * cleans dc, ac, coded_block for the current non intra MB
1800  */
1801 void ff_clean_intra_table_entries(MpegEncContext *s)
1802 {
1803     int wrap = s->b8_stride;
1804     int xy = s->block_index[0];
1805
1806     s->dc_val[0][xy           ] =
1807     s->dc_val[0][xy + 1       ] =
1808     s->dc_val[0][xy     + wrap] =
1809     s->dc_val[0][xy + 1 + wrap] = 1024;
1810     /* ac pred */
1811     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1812     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1813     if (s->msmpeg4_version>=3) {
1814         s->coded_block[xy           ] =
1815         s->coded_block[xy + 1       ] =
1816         s->coded_block[xy     + wrap] =
1817         s->coded_block[xy + 1 + wrap] = 0;
1818     }
1819     /* chroma */
1820     wrap = s->mb_stride;
1821     xy = s->mb_x + s->mb_y * wrap;
1822     s->dc_val[1][xy] =
1823     s->dc_val[2][xy] = 1024;
1824     /* ac pred */
1825     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1826     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1827
1828     s->mbintra_table[xy]= 0;
1829 }
1830
1831 /* generic function called after a macroblock has been parsed by the
1832    decoder or after it has been encoded by the encoder.
1833
1834    Important variables used:
1835    s->mb_intra : true if intra macroblock
1836    s->mv_dir   : motion vector direction
1837    s->mv_type  : motion vector type
1838    s->mv       : motion vector
1839    s->interlaced_dct : true if interlaced dct used (mpeg2)
1840  */
1841 static av_always_inline
1842 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1843                             int lowres_flag, int is_mpeg12)
1844 {
1845     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1846     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1847         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1848         return;
1849     }
1850
1851     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1852        /* save DCT coefficients */
1853        int i,j;
1854        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1855        for(i=0; i<6; i++)
1856            for(j=0; j<64; j++)
1857                *dct++ = block[i][s->dsp.idct_permutation[j]];
1858     }
1859
1860     s->current_picture.qscale_table[mb_xy]= s->qscale;
1861
1862     /* update DC predictors for P macroblocks */
1863     if (!s->mb_intra) {
1864         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1865             if(s->mbintra_table[mb_xy])
1866                 ff_clean_intra_table_entries(s);
1867         } else {
1868             s->last_dc[0] =
1869             s->last_dc[1] =
1870             s->last_dc[2] = 128 << s->intra_dc_precision;
1871         }
1872     }
1873     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1874         s->mbintra_table[mb_xy]=1;
1875
1876     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1877         uint8_t *dest_y, *dest_cb, *dest_cr;
1878         int dct_linesize, dct_offset;
1879         op_pixels_func (*op_pix)[4];
1880         qpel_mc_func (*op_qpix)[16];
1881         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1882         const int uvlinesize= s->current_picture.linesize[1];
1883         const int readable= s->pict_type != FF_B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1884         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1885
1886         /* avoid copy if macroblock skipped in last frame too */
1887         /* skip only during decoding as we might trash the buffers during encoding a bit */
1888         if(!s->encoding){
1889             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1890             const int age= s->current_picture.age;
1891
1892             assert(age);
1893
1894             if (s->mb_skipped) {
1895                 s->mb_skipped= 0;
1896                 assert(s->pict_type!=FF_I_TYPE);
1897
1898                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1899                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1900
1901                 /* if previous was skipped too, then nothing to do !  */
1902                 if (*mbskip_ptr >= age && s->current_picture.reference){
1903                     return;
1904                 }
1905             } else if(!s->current_picture.reference){
1906                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1907                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1908             } else{
1909                 *mbskip_ptr = 0; /* not skipped */
1910             }
1911         }
1912
1913         dct_linesize = linesize << s->interlaced_dct;
1914         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1915
1916         if(readable){
1917             dest_y=  s->dest[0];
1918             dest_cb= s->dest[1];
1919             dest_cr= s->dest[2];
1920         }else{
1921             dest_y = s->b_scratchpad;
1922             dest_cb= s->b_scratchpad+16*linesize;
1923             dest_cr= s->b_scratchpad+32*linesize;
1924         }
1925
1926         if (!s->mb_intra) {
1927             /* motion handling */
1928             /* decoding or more than one mb_type (MC was already done otherwise) */
1929             if(!s->encoding){
1930                 if(lowres_flag){
1931                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1932
1933                     if (s->mv_dir & MV_DIR_FORWARD) {
1934                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1935                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1936                     }
1937                     if (s->mv_dir & MV_DIR_BACKWARD) {
1938                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1939                     }
1940                 }else{
1941                     op_qpix= s->me.qpel_put;
1942                     if ((!s->no_rounding) || s->pict_type==FF_B_TYPE){
1943                         op_pix = s->dsp.put_pixels_tab;
1944                     }else{
1945                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1946                     }
1947                     if (s->mv_dir & MV_DIR_FORWARD) {
1948                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1949                         op_pix = s->dsp.avg_pixels_tab;
1950                         op_qpix= s->me.qpel_avg;
1951                     }
1952                     if (s->mv_dir & MV_DIR_BACKWARD) {
1953                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1954                     }
1955                 }
1956             }
1957
1958             /* skip dequant / idct if we are really late ;) */
1959             if(s->hurry_up>1) goto skip_idct;
1960             if(s->avctx->skip_idct){
1961                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
1962                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
1963                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1964                     goto skip_idct;
1965             }
1966
1967             /* add dct residue */
1968             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1969                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1970                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1971                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1972                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1973                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1974
1975                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1976                     if (s->chroma_y_shift){
1977                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1978                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1979                     }else{
1980                         dct_linesize >>= 1;
1981                         dct_offset >>=1;
1982                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1983                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1984                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1985                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1986                     }
1987                 }
1988             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
1989                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1990                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1991                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1992                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1993
1994                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1995                     if(s->chroma_y_shift){//Chroma420
1996                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
1997                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
1998                     }else{
1999                         //chroma422
2000                         dct_linesize = uvlinesize << s->interlaced_dct;
2001                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2002
2003                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2004                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2005                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2006                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2007                         if(!s->chroma_x_shift){//Chroma444
2008                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2009                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2010                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2011                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2012                         }
2013                     }
2014                 }//fi gray
2015             }
2016             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
2017                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2018             }
2019         } else {
2020             /* dct only in intra block */
2021             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2022                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2023                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2024                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2025                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2026
2027                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2028                     if(s->chroma_y_shift){
2029                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2030                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2031                     }else{
2032                         dct_offset >>=1;
2033                         dct_linesize >>=1;
2034                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2035                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2036                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2037                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2038                     }
2039                 }
2040             }else{
2041                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2042                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2043                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2044                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2045
2046                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2047                     if(s->chroma_y_shift){
2048                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2049                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2050                     }else{
2051
2052                         dct_linesize = uvlinesize << s->interlaced_dct;
2053                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2054
2055                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2056                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2057                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2058                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2059                         if(!s->chroma_x_shift){//Chroma444
2060                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2061                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2062                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2063                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2064                         }
2065                     }
2066                 }//gray
2067             }
2068         }
2069 skip_idct:
2070         if(!readable){
2071             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2072             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2073             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2074         }
2075     }
2076 }
2077
2078 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2079 #if !CONFIG_SMALL
2080     if(s->out_format == FMT_MPEG1) {
2081         if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
2082         else                 MPV_decode_mb_internal(s, block, 0, 1);
2083     } else
2084 #endif
2085     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
2086     else                  MPV_decode_mb_internal(s, block, 0, 0);
2087 }
2088
2089 /**
2090  *
2091  * @param h is the normal height, this will be reduced automatically if needed for the last row
2092  */
2093 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2094     if (s->avctx->draw_horiz_band) {
2095         AVFrame *src;
2096         const int field_pic= s->picture_structure != PICT_FRAME;
2097         int offset[4];
2098
2099         h= FFMIN(h, (s->avctx->height>>field_pic) - y);
2100
2101         if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
2102             h <<= 1;
2103             y <<= 1;
2104             if(s->first_field) return;
2105         }
2106
2107         if(s->pict_type==FF_B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2108             src= (AVFrame*)s->current_picture_ptr;
2109         else if(s->last_picture_ptr)
2110             src= (AVFrame*)s->last_picture_ptr;
2111         else
2112             return;
2113
2114         if(s->pict_type==FF_B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2115             offset[0]=
2116             offset[1]=
2117             offset[2]=
2118             offset[3]= 0;
2119         }else{
2120             offset[0]= y * s->linesize;
2121             offset[1]=
2122             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2123             offset[3]= 0;
2124         }
2125
2126         emms_c();
2127
2128         s->avctx->draw_horiz_band(s->avctx, src, offset,
2129                                   y, s->picture_structure, h);
2130     }
2131 }
2132
2133 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2134     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2135     const int uvlinesize= s->current_picture.linesize[1];
2136     const int mb_size= 4 - s->avctx->lowres;
2137
2138     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2139     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2140     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2141     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2142     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2143     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2144     //block_index is not used by mpeg2, so it is not affected by chroma_format
2145
2146     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2147     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2148     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2149
2150     if(!(s->pict_type==FF_B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2151     {
2152         if(s->picture_structure==PICT_FRAME){
2153         s->dest[0] += s->mb_y *   linesize << mb_size;
2154         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2155         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2156         }else{
2157             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2158             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2159             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2160             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2161         }
2162     }
2163 }
2164
2165 void ff_mpeg_flush(AVCodecContext *avctx){
2166     int i;
2167     MpegEncContext *s = avctx->priv_data;
2168
2169     if(s==NULL || s->picture==NULL)
2170         return;
2171
2172     for(i=0; i<MAX_PICTURE_COUNT; i++){
2173        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2174                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2175         free_frame_buffer(s, &s->picture[i]);
2176     }
2177     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2178
2179     s->mb_x= s->mb_y= 0;
2180     s->closed_gop= 0;
2181
2182     s->parse_context.state= -1;
2183     s->parse_context.frame_start_found= 0;
2184     s->parse_context.overread= 0;
2185     s->parse_context.overread_index= 0;
2186     s->parse_context.index= 0;
2187     s->parse_context.last_index= 0;
2188     s->bitstream_buffer_size=0;
2189     s->pp_time=0;
2190 }
2191
2192 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2193                                    DCTELEM *block, int n, int qscale)
2194 {
2195     int i, level, nCoeffs;
2196     const uint16_t *quant_matrix;
2197
2198     nCoeffs= s->block_last_index[n];
2199
2200     if (n < 4)
2201         block[0] = block[0] * s->y_dc_scale;
2202     else
2203         block[0] = block[0] * s->c_dc_scale;
2204     /* XXX: only mpeg1 */
2205     quant_matrix = s->intra_matrix;
2206     for(i=1;i<=nCoeffs;i++) {
2207         int j= s->intra_scantable.permutated[i];
2208         level = block[j];
2209         if (level) {
2210             if (level < 0) {
2211                 level = -level;
2212                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2213                 level = (level - 1) | 1;
2214                 level = -level;
2215             } else {
2216                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2217                 level = (level - 1) | 1;
2218             }
2219             block[j] = level;
2220         }
2221     }
2222 }
2223
2224 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2225                                    DCTELEM *block, int n, int qscale)
2226 {
2227     int i, level, nCoeffs;
2228     const uint16_t *quant_matrix;
2229
2230     nCoeffs= s->block_last_index[n];
2231
2232     quant_matrix = s->inter_matrix;
2233     for(i=0; i<=nCoeffs; i++) {
2234         int j= s->intra_scantable.permutated[i];
2235         level = block[j];
2236         if (level) {
2237             if (level < 0) {
2238                 level = -level;
2239                 level = (((level << 1) + 1) * qscale *
2240                          ((int) (quant_matrix[j]))) >> 4;
2241                 level = (level - 1) | 1;
2242                 level = -level;
2243             } else {
2244                 level = (((level << 1) + 1) * qscale *
2245                          ((int) (quant_matrix[j]))) >> 4;
2246                 level = (level - 1) | 1;
2247             }
2248             block[j] = level;
2249         }
2250     }
2251 }
2252
2253 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2254                                    DCTELEM *block, int n, int qscale)
2255 {
2256     int i, level, nCoeffs;
2257     const uint16_t *quant_matrix;
2258
2259     if(s->alternate_scan) nCoeffs= 63;
2260     else nCoeffs= s->block_last_index[n];
2261
2262     if (n < 4)
2263         block[0] = block[0] * s->y_dc_scale;
2264     else
2265         block[0] = block[0] * s->c_dc_scale;
2266     quant_matrix = s->intra_matrix;
2267     for(i=1;i<=nCoeffs;i++) {
2268         int j= s->intra_scantable.permutated[i];
2269         level = block[j];
2270         if (level) {
2271             if (level < 0) {
2272                 level = -level;
2273                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2274                 level = -level;
2275             } else {
2276                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2277             }
2278             block[j] = level;
2279         }
2280     }
2281 }
2282
2283 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2284                                    DCTELEM *block, int n, int qscale)
2285 {
2286     int i, level, nCoeffs;
2287     const uint16_t *quant_matrix;
2288     int sum=-1;
2289
2290     if(s->alternate_scan) nCoeffs= 63;
2291     else nCoeffs= s->block_last_index[n];
2292
2293     if (n < 4)
2294         block[0] = block[0] * s->y_dc_scale;
2295     else
2296         block[0] = block[0] * s->c_dc_scale;
2297     quant_matrix = s->intra_matrix;
2298     for(i=1;i<=nCoeffs;i++) {
2299         int j= s->intra_scantable.permutated[i];
2300         level = block[j];
2301         if (level) {
2302             if (level < 0) {
2303                 level = -level;
2304                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2305                 level = -level;
2306             } else {
2307                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2308             }
2309             block[j] = level;
2310             sum+=level;
2311         }
2312     }
2313     block[63]^=sum&1;
2314 }
2315
2316 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2317                                    DCTELEM *block, int n, int qscale)
2318 {
2319     int i, level, nCoeffs;
2320     const uint16_t *quant_matrix;
2321     int sum=-1;
2322
2323     if(s->alternate_scan) nCoeffs= 63;
2324     else nCoeffs= s->block_last_index[n];
2325
2326     quant_matrix = s->inter_matrix;
2327     for(i=0; i<=nCoeffs; i++) {
2328         int j= s->intra_scantable.permutated[i];
2329         level = block[j];
2330         if (level) {
2331             if (level < 0) {
2332                 level = -level;
2333                 level = (((level << 1) + 1) * qscale *
2334                          ((int) (quant_matrix[j]))) >> 4;
2335                 level = -level;
2336             } else {
2337                 level = (((level << 1) + 1) * qscale *
2338                          ((int) (quant_matrix[j]))) >> 4;
2339             }
2340             block[j] = level;
2341             sum+=level;
2342         }
2343     }
2344     block[63]^=sum&1;
2345 }
2346
2347 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2348                                   DCTELEM *block, int n, int qscale)
2349 {
2350     int i, level, qmul, qadd;
2351     int nCoeffs;
2352
2353     assert(s->block_last_index[n]>=0);
2354
2355     qmul = qscale << 1;
2356
2357     if (!s->h263_aic) {
2358         if (n < 4)
2359             block[0] = block[0] * s->y_dc_scale;
2360         else
2361             block[0] = block[0] * s->c_dc_scale;
2362         qadd = (qscale - 1) | 1;
2363     }else{
2364         qadd = 0;
2365     }
2366     if(s->ac_pred)
2367         nCoeffs=63;
2368     else
2369         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2370
2371     for(i=1; i<=nCoeffs; i++) {
2372         level = block[i];
2373         if (level) {
2374             if (level < 0) {
2375                 level = level * qmul - qadd;
2376             } else {
2377                 level = level * qmul + qadd;
2378             }
2379             block[i] = level;
2380         }
2381     }
2382 }
2383
2384 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2385                                   DCTELEM *block, int n, int qscale)
2386 {
2387     int i, level, qmul, qadd;
2388     int nCoeffs;
2389
2390     assert(s->block_last_index[n]>=0);
2391
2392     qadd = (qscale - 1) | 1;
2393     qmul = qscale << 1;
2394
2395     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2396
2397     for(i=0; i<=nCoeffs; i++) {
2398         level = block[i];
2399         if (level) {
2400             if (level < 0) {
2401                 level = level * qmul - qadd;
2402             } else {
2403                 level = level * qmul + qadd;
2404             }
2405             block[i] = level;
2406         }
2407     }
2408 }
2409
2410 /**
2411  * set qscale and update qscale dependent variables.
2412  */
2413 void ff_set_qscale(MpegEncContext * s, int qscale)
2414 {
2415     if (qscale < 1)
2416         qscale = 1;
2417     else if (qscale > 31)
2418         qscale = 31;
2419
2420     s->qscale = qscale;
2421     s->chroma_qscale= s->chroma_qscale_table[qscale];
2422
2423     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2424     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2425 }