git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/imgutils.h"
  32 #include "avcodec.h"
  33 #include "dsputil.h"
  34 #include "internal.h"
  35 #include "mpegvideo.h"
  36 #include "mjpegenc.h"
  37 #include "msmpeg4.h"
  38 #include "xvmc_internal.h"
  39 #include "thread.h"
  40 #include <limits.h>
  41
  42 //#undef NDEBUG
  43 //#include <assert.h>
  44
  45 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  46                                    DCTELEM *block, int n, int qscale);
  47 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  48                                    DCTELEM *block, int n, int qscale);
  49 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  50                                    DCTELEM *block, int n, int qscale);
  51 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  52                                    DCTELEM *block, int n, int qscale);
  53 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  54                                    DCTELEM *block, int n, int qscale);
  55 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  56                                   DCTELEM *block, int n, int qscale);
  57 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  58                                   DCTELEM *block, int n, int qscale);
  59
  60
  61 /* enable all paranoid tests for rounding, overflows, etc... */
  62 //#define PARANOID
  63
  64 //#define DEBUG
  65
  66
  67 static const uint8_t ff_default_chroma_qscale_table[32] = {
  68 //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
  69      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
  70     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
  71 };
  72
  73 const uint8_t ff_mpeg1_dc_scale_table[128] = {
  74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  75     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  76     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  77     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  78     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  79     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  80     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  81     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  82     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  83 };
  84
  85 static const uint8_t mpeg2_dc_scale_table1[128] = {
  86 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  87     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  88     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  89     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  90     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  91     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  92     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  93     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  94     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  95 };
  96
  97 static const uint8_t mpeg2_dc_scale_table2[128] = {
  98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  99     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 100     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 101     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 102     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 103     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 104     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 105     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 106     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107 };
 108
 109 static const uint8_t mpeg2_dc_scale_table3[128] = {
 110 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 111     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 112     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 113     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 114     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 115     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 116     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 117     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 118     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 119 };
 120
 121 const uint8_t *const ff_mpeg2_dc_scale_table[4] = {
 122     ff_mpeg1_dc_scale_table,
 123     mpeg2_dc_scale_table1,
 124     mpeg2_dc_scale_table2,
 125     mpeg2_dc_scale_table3,
 126 };
 127
 128 const enum PixelFormat ff_pixfmt_list_420[] = {
 129     PIX_FMT_YUV420P,
 130     PIX_FMT_NONE
 131 };
 132
 133 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
 134     PIX_FMT_DXVA2_VLD,
 135     PIX_FMT_VAAPI_VLD,
 136     PIX_FMT_VDA_VLD,
 137     PIX_FMT_YUV420P,
 138     PIX_FMT_NONE
 139 };
 140
 141 const uint8_t *avpriv_mpv_find_start_code(const uint8_t *restrict p,
 142                                           const uint8_t *end,
 143                                           uint32_t * restrict state)
 144 {
 145     int i;
 146
 147     assert(p <= end);
 148     if (p >= end)
 149         return end;
 150
 151     for (i = 0; i < 3; i++) {
 152         uint32_t tmp = *state << 8;
 153         *state = tmp + *(p++);
 154         if (tmp == 0x100 || p == end)
 155             return p;
 156     }
 157
 158     while (p < end) {
 159         if      (p[-1] > 1      ) p += 3;
 160         else if (p[-2]          ) p += 2;
 161         else if (p[-3]|(p[-1]-1)) p++;
 162         else {
 163             p++;
 164             break;
 165         }
 166     }
 167
 168     p = FFMIN(p, end) - 4;
 169     *state = AV_RB32(p);
 170
 171     return p + 4;
 172 }
 173
 174 /* init common dct for both encoder and decoder */
 175 av_cold int ff_dct_common_init(MpegEncContext *s)
 176 {
 177     ff_dsputil_init(&s->dsp, s->avctx);
 178
 179     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 180     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 181     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 182     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 183     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 184     if (s->flags & CODEC_FLAG_BITEXACT)
 185         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 186     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 187
 188 #if HAVE_MMX
 189     ff_MPV_common_init_mmx(s);
 190 #elif ARCH_ALPHA
 191     ff_MPV_common_init_axp(s);
 192 #elif HAVE_MMI
 193     ff_MPV_common_init_mmi(s);
 194 #elif ARCH_ARM
 195     ff_MPV_common_init_arm(s);
 196 #elif HAVE_ALTIVEC
 197     ff_MPV_common_init_altivec(s);
 198 #elif ARCH_BFIN
 199     ff_MPV_common_init_bfin(s);
 200 #endif
 201
 202     /* load & permutate scantables
 203      * note: only wmv uses different ones
 204      */
 205     if (s->alternate_scan) {
 206         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 207         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 208     } else {
 209         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 210         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 211     }
 212     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 213     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 214
 215     return 0;
 216 }
 217
 218 void ff_copy_picture(Picture *dst, Picture *src)
 219 {
 220     *dst = *src;
 221     dst->f.type = FF_BUFFER_TYPE_COPY;
 222 }
 223
 224 /**
 225  * Release a frame buffer
 226  */
 227 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 228 {
 229     /* Windows Media Image codecs allocate internal buffers with different
 230      * dimensions; ignore user defined callbacks for these
 231      */
 232     if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE)
 233         ff_thread_release_buffer(s->avctx, &pic->f);
 234     else
 235         avcodec_default_release_buffer(s->avctx, &pic->f);
 236     av_freep(&pic->f.hwaccel_picture_private);
 237 }
 238
 239 /**
 240  * Allocate a frame buffer
 241  */
 242 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
 243 {
 244     int r;
 245
 246     if (s->avctx->hwaccel) {
 247         assert(!pic->f.hwaccel_picture_private);
 248         if (s->avctx->hwaccel->priv_data_size) {
 249             pic->f.hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
 250             if (!pic->f.hwaccel_picture_private) {
 251                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
 252                 return -1;
 253             }
 254         }
 255     }
 256
 257     if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE)
 258         r = ff_thread_get_buffer(s->avctx, &pic->f);
 259     else
 260         r = avcodec_default_get_buffer(s->avctx, &pic->f);
 261
 262     if (r < 0 || !pic->f.type || !pic->f.data[0]) {
 263         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %p)\n",
 264                r, pic->f.type, pic->f.data[0]);
 265         av_freep(&pic->f.hwaccel_picture_private);
 266         return -1;
 267     }
 268
 269     if (s->linesize && (s->linesize   != pic->f.linesize[0] ||
 270                         s->uvlinesize != pic->f.linesize[1])) {
 271         av_log(s->avctx, AV_LOG_ERROR,
 272                "get_buffer() failed (stride changed)\n");
 273         free_frame_buffer(s, pic);
 274         return -1;
 275     }
 276
 277     if (pic->f.linesize[1] != pic->f.linesize[2]) {
 278         av_log(s->avctx, AV_LOG_ERROR,
 279                "get_buffer() failed (uv stride mismatch)\n");
 280         free_frame_buffer(s, pic);
 281         return -1;
 282     }
 283
 284     return 0;
 285 }
 286
 287 /**
 288  * Allocate a Picture.
 289  * The pixels are allocated/set by calling get_buffer() if shared = 0
 290  */
 291 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 292 {
 293     const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
 294
 295     // the + 1 is needed so memset(,,stride*height) does not sig11
 296
 297     const int mb_array_size = s->mb_stride * s->mb_height;
 298     const int b8_array_size = s->b8_stride * s->mb_height * 2;
 299     const int b4_array_size = s->b4_stride * s->mb_height * 4;
 300     int i;
 301     int r = -1;
 302
 303     if (shared) {
 304         assert(pic->f.data[0]);
 305         assert(pic->f.type == 0 || pic->f.type == FF_BUFFER_TYPE_SHARED);
 306         pic->f.type = FF_BUFFER_TYPE_SHARED;
 307     } else {
 308         assert(!pic->f.data[0]);
 309
 310         if (alloc_frame_buffer(s, pic) < 0)
 311             return -1;
 312
 313         s->linesize   = pic->f.linesize[0];
 314         s->uvlinesize = pic->f.linesize[1];
 315     }
 316
 317     if (pic->f.qscale_table == NULL) {
 318         if (s->encoding) {
 319             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var,
 320                               mb_array_size * sizeof(int16_t), fail)
 321             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var,
 322                               mb_array_size * sizeof(int16_t), fail)
 323             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean,
 324                               mb_array_size * sizeof(int8_t ), fail)
 325         }
 326
 327         FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.mbskip_table,
 328                           mb_array_size * sizeof(uint8_t) + 2, fail)// the + 2 is for the slice end check
 329         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table_base,
 330                           (big_mb_num + s->mb_stride) * sizeof(uint8_t),
 331                           fail)
 332         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base,
 333                           (big_mb_num + s->mb_stride) * sizeof(uint32_t),
 334                           fail)
 335         pic->f.mb_type = pic->mb_type_base + 2 * s->mb_stride + 1;
 336         pic->f.qscale_table = pic->qscale_table_base + 2 * s->mb_stride + 1;
 337         if (s->out_format == FMT_H264) {
 338             for (i = 0; i < 2; i++) {
 339                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i],
 340                                   2 * (b4_array_size + 4) * sizeof(int16_t),
 341                                   fail)
 342                 pic->f.motion_val[i] = pic->motion_val_base[i] + 4;
 343                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.ref_index[i],
 344                                   4 * mb_array_size * sizeof(uint8_t), fail)
 345             }
 346             pic->f.motion_subsample_log2 = 2;
 347         } else if (s->out_format == FMT_H263 || s->encoding ||
 348                    (s->avctx->debug & FF_DEBUG_MV) || s->avctx->debug_mv) {
 349             for (i = 0; i < 2; i++) {
 350                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i],
 351                                   2 * (b8_array_size + 4) * sizeof(int16_t),
 352                                   fail)
 353                 pic->f.motion_val[i] = pic->motion_val_base[i] + 4;
 354                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.ref_index[i],
 355                                   4 * mb_array_size * sizeof(uint8_t), fail)
 356             }
 357             pic->f.motion_subsample_log2 = 3;
 358         }
 359         if (s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 360             FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.dct_coeff,
 361                               64 * mb_array_size * sizeof(DCTELEM) * 6, fail)
 362         }
 363         pic->f.qstride = s->mb_stride;
 364         FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.pan_scan,
 365                           1 * sizeof(AVPanScan), fail)
 366     }
 367
 368     pic->owner2 = s;
 369
 370     return 0;
 371 fail: // for  the FF_ALLOCZ_OR_GOTO macro
 372     if (r >= 0)
 373         free_frame_buffer(s, pic);
 374     return -1;
 375 }
 376
 377 /**
 378  * Deallocate a picture.
 379  */
 380 static void free_picture(MpegEncContext *s, Picture *pic)
 381 {
 382     int i;
 383
 384     if (pic->f.data[0] && pic->f.type != FF_BUFFER_TYPE_SHARED) {
 385         free_frame_buffer(s, pic);
 386     }
 387
 388     av_freep(&pic->mb_var);
 389     av_freep(&pic->mc_mb_var);
 390     av_freep(&pic->mb_mean);
 391     av_freep(&pic->f.mbskip_table);
 392     av_freep(&pic->qscale_table_base);
 393     av_freep(&pic->mb_type_base);
 394     av_freep(&pic->f.dct_coeff);
 395     av_freep(&pic->f.pan_scan);
 396     pic->f.mb_type = NULL;
 397     for (i = 0; i < 2; i++) {
 398         av_freep(&pic->motion_val_base[i]);
 399         av_freep(&pic->f.ref_index[i]);
 400     }
 401
 402     if (pic->f.type == FF_BUFFER_TYPE_SHARED) {
 403         for (i = 0; i < 4; i++) {
 404             pic->f.base[i] =
 405             pic->f.data[i] = NULL;
 406         }
 407         pic->f.type = 0;
 408     }
 409 }
 410
 411 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base)
 412 {
 413     int y_size = s->b8_stride * (2 * s->mb_height + 1);
 414     int c_size = s->mb_stride * (s->mb_height + 1);
 415     int yc_size = y_size + 2 * c_size;
 416     int i;
 417
 418     // edge emu needs blocksize + filter length - 1
 419     // (= 17x17 for  halfpel / 21x21 for  h264)
 420     FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer,
 421                       (s->width + 64) * 2 * 21 * 2, fail);    // (width + edge + align)*interlaced*MBsize*tolerance
 422
 423     // FIXME should be linesize instead of s->width * 2
 424     // but that is not known before get_buffer()
 425     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,
 426                       (s->width + 64) * 4 * 16 * 2 * sizeof(uint8_t), fail)
 427     s->me.temp         = s->me.scratchpad;
 428     s->rd_scratchpad   = s->me.scratchpad;
 429     s->b_scratchpad    = s->me.scratchpad;
 430     s->obmc_scratchpad = s->me.scratchpad + 16;
 431     if (s->encoding) {
 432         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map,
 433                           ME_MAP_SIZE * sizeof(uint32_t), fail)
 434         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map,
 435                           ME_MAP_SIZE * sizeof(uint32_t), fail)
 436         if (s->avctx->noise_reduction) {
 437             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum,
 438                               2 * 64 * sizeof(int), fail)
 439         }
 440     }
 441     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64 * 12 * 2 * sizeof(DCTELEM), fail)
 442     s->block = s->blocks[0];
 443
 444     for (i = 0; i < 12; i++) {
 445         s->pblocks[i] = &s->block[i];
 446     }
 447
 448     if (s->out_format == FMT_H263) {
 449         /* ac values */
 450         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base,
 451                           yc_size * sizeof(int16_t) * 16, fail);
 452         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 453         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 454         s->ac_val[2] = s->ac_val[1] + c_size;
 455     }
 456
 457     return 0;
 458 fail:
 459     return -1; // free() through ff_MPV_common_end()
 460 }
 461
 462 static void free_duplicate_context(MpegEncContext *s)
 463 {
 464     if (s == NULL)
 465         return;
 466
 467     av_freep(&s->edge_emu_buffer);
 468     av_freep(&s->me.scratchpad);
 469     s->me.temp =
 470     s->rd_scratchpad =
 471     s->b_scratchpad =
 472     s->obmc_scratchpad = NULL;
 473
 474     av_freep(&s->dct_error_sum);
 475     av_freep(&s->me.map);
 476     av_freep(&s->me.score_map);
 477     av_freep(&s->blocks);
 478     av_freep(&s->ac_val_base);
 479     s->block = NULL;
 480 }
 481
 482 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src)
 483 {
 484 #define COPY(a) bak->a = src->a
 485     COPY(edge_emu_buffer);
 486     COPY(me.scratchpad);
 487     COPY(me.temp);
 488     COPY(rd_scratchpad);
 489     COPY(b_scratchpad);
 490     COPY(obmc_scratchpad);
 491     COPY(me.map);
 492     COPY(me.score_map);
 493     COPY(blocks);
 494     COPY(block);
 495     COPY(start_mb_y);
 496     COPY(end_mb_y);
 497     COPY(me.map_generation);
 498     COPY(pb);
 499     COPY(dct_error_sum);
 500     COPY(dct_count[0]);
 501     COPY(dct_count[1]);
 502     COPY(ac_val_base);
 503     COPY(ac_val[0]);
 504     COPY(ac_val[1]);
 505     COPY(ac_val[2]);
 506 #undef COPY
 507 }
 508
 509 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src)
 510 {
 511     MpegEncContext bak;
 512     int i;
 513     // FIXME copy only needed parts
 514     // START_TIMER
 515     backup_duplicate_context(&bak, dst);
 516     memcpy(dst, src, sizeof(MpegEncContext));
 517     backup_duplicate_context(dst, &bak);
 518     for (i = 0; i < 12; i++) {
 519         dst->pblocks[i] = &dst->block[i];
 520     }
 521     // STOP_TIMER("update_duplicate_context")
 522     // about 10k cycles / 0.01 sec for  1000frames on 1ghz with 2 threads
 523 }
 524
 525 int ff_mpeg_update_thread_context(AVCodecContext *dst,
 526                                   const AVCodecContext *src)
 527 {
 528     MpegEncContext *s = dst->priv_data, *s1 = src->priv_data;
 529
 530     if (dst == src || !s1->context_initialized)
 531         return 0;
 532
 533     // FIXME can parameters change on I-frames?
 534     // in that case dst may need a reinit
 535     if (!s->context_initialized) {
 536         memcpy(s, s1, sizeof(MpegEncContext));
 537
 538         s->avctx                 = dst;
 539         s->picture_range_start  += MAX_PICTURE_COUNT;
 540         s->picture_range_end    += MAX_PICTURE_COUNT;
 541         s->bitstream_buffer      = NULL;
 542         s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0;
 543
 544         ff_MPV_common_init(s);
 545     }
 546
 547     s->avctx->coded_height  = s1->avctx->coded_height;
 548     s->avctx->coded_width   = s1->avctx->coded_width;
 549     s->avctx->width         = s1->avctx->width;
 550     s->avctx->height        = s1->avctx->height;
 551
 552     s->coded_picture_number = s1->coded_picture_number;
 553     s->picture_number       = s1->picture_number;
 554     s->input_picture_number = s1->input_picture_number;
 555
 556     memcpy(s->picture, s1->picture, s1->picture_count * sizeof(Picture));
 557     memcpy(&s->last_picture, &s1->last_picture,
 558            (char *) &s1->last_picture_ptr - (char *) &s1->last_picture);
 559
 560     s->last_picture_ptr    = REBASE_PICTURE(s1->last_picture_ptr,    s, s1);
 561     s->current_picture_ptr = REBASE_PICTURE(s1->current_picture_ptr, s, s1);
 562     s->next_picture_ptr    = REBASE_PICTURE(s1->next_picture_ptr,    s, s1);
 563
 564     // Error/bug resilience
 565     s->next_p_frame_damaged = s1->next_p_frame_damaged;
 566     s->workaround_bugs      = s1->workaround_bugs;
 567
 568     // MPEG4 timing info
 569     memcpy(&s->time_increment_bits, &s1->time_increment_bits,
 570            (char *) &s1->shape - (char *) &s1->time_increment_bits);
 571
 572     // B-frame info
 573     s->max_b_frames = s1->max_b_frames;
 574     s->low_delay    = s1->low_delay;
 575     s->dropable     = s1->dropable;
 576
 577     // DivX handling (doesn't work)
 578     s->divx_packed  = s1->divx_packed;
 579
 580     if (s1->bitstream_buffer) {
 581         if (s1->bitstream_buffer_size +
 582             FF_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size)
 583             av_fast_malloc(&s->bitstream_buffer,
 584                            &s->allocated_bitstream_buffer_size,
 585                            s1->allocated_bitstream_buffer_size);
 586             s->bitstream_buffer_size = s1->bitstream_buffer_size;
 587         memcpy(s->bitstream_buffer, s1->bitstream_buffer,
 588                s1->bitstream_buffer_size);
 589         memset(s->bitstream_buffer + s->bitstream_buffer_size, 0,
 590                FF_INPUT_BUFFER_PADDING_SIZE);
 591     }
 592
 593     // MPEG2/interlacing info
 594     memcpy(&s->progressive_sequence, &s1->progressive_sequence,
 595            (char *) &s1->rtp_mode - (char *) &s1->progressive_sequence);
 596
 597     if (!s1->first_field) {
 598         s->last_pict_type = s1->pict_type;
 599         if (s1->current_picture_ptr)
 600             s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->f.quality;
 601
 602         if (s1->pict_type != AV_PICTURE_TYPE_B) {
 603             s->last_non_b_pict_type = s1->pict_type;
 604         }
 605     }
 606
 607     return 0;
 608 }
 609
 610 /**
 611  * Set the given MpegEncContext to common defaults
 612  * (same for encoding and decoding).
 613  * The changed fields will not depend upon the
 614  * prior state of the MpegEncContext.
 615  */
 616 void ff_MPV_common_defaults(MpegEncContext *s)
 617 {
 618     s->y_dc_scale_table      =
 619     s->c_dc_scale_table      = ff_mpeg1_dc_scale_table;
 620     s->chroma_qscale_table   = ff_default_chroma_qscale_table;
 621     s->progressive_frame     = 1;
 622     s->progressive_sequence  = 1;
 623     s->picture_structure     = PICT_FRAME;
 624
 625     s->coded_picture_number  = 0;
 626     s->picture_number        = 0;
 627     s->input_picture_number  = 0;
 628
 629     s->picture_in_gop_number = 0;
 630
 631     s->f_code                = 1;
 632     s->b_code                = 1;
 633
 634     s->picture_range_start   = 0;
 635     s->picture_range_end     = MAX_PICTURE_COUNT;
 636
 637     s->slice_context_count   = 1;
 638 }
 639
 640 /**
 641  * Set the given MpegEncContext to defaults for decoding.
 642  * the changed fields will not depend upon
 643  * the prior state of the MpegEncContext.
 644  */
 645 void ff_MPV_decode_defaults(MpegEncContext *s)
 646 {
 647     ff_MPV_common_defaults(s);
 648 }
 649
 650 /**
 651  * init common structure for both encoder and decoder.
 652  * this assumes that some variables like width/height are already set
 653  */
 654 av_cold int ff_MPV_common_init(MpegEncContext *s)
 655 {
 656     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 657     int nb_slices = (HAVE_THREADS &&
 658                      s->avctx->active_thread_type & FF_THREAD_SLICE) ?
 659                     s->avctx->thread_count : 1;
 660
 661     if (s->encoding && s->avctx->slices)
 662         nb_slices = s->avctx->slices;
 663
 664     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
 665         s->mb_height = (s->height + 31) / 32 * 2;
 666     else if (s->codec_id != AV_CODEC_ID_H264)
 667         s->mb_height = (s->height + 15) / 16;
 668
 669     if (s->avctx->pix_fmt == PIX_FMT_NONE) {
 670         av_log(s->avctx, AV_LOG_ERROR,
 671                "decoding to PIX_FMT_NONE is not supported.\n");
 672         return -1;
 673     }
 674
 675     if (nb_slices > MAX_THREADS || (nb_slices > s->mb_height && s->mb_height)) {
 676         int max_slices;
 677         if (s->mb_height)
 678             max_slices = FFMIN(MAX_THREADS, s->mb_height);
 679         else
 680             max_slices = MAX_THREADS;
 681         av_log(s->avctx, AV_LOG_WARNING, "too many threads/slices (%d),"
 682                " reducing to %d\n", nb_slices, max_slices);
 683         nb_slices = max_slices;
 684     }
 685
 686     if ((s->width || s->height) &&
 687         av_image_check_size(s->width, s->height, 0, s->avctx))
 688         return -1;
 689
 690     ff_dct_common_init(s);
 691
 692     s->flags  = s->avctx->flags;
 693     s->flags2 = s->avctx->flags2;
 694
 695     if (s->width && s->height) {
 696         s->mb_width   = (s->width + 15) / 16;
 697         s->mb_stride  = s->mb_width + 1;
 698         s->b8_stride  = s->mb_width * 2 + 1;
 699         s->b4_stride  = s->mb_width * 4 + 1;
 700         mb_array_size = s->mb_height * s->mb_stride;
 701         mv_table_size = (s->mb_height + 2) * s->mb_stride + 1;
 702
 703         /* set chroma shifts */
 704         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &s->chroma_x_shift,
 705                                       &s->chroma_y_shift);
 706
 707         /* set default edge pos, will be overriden
 708          * in decode_header if needed */
 709         s->h_edge_pos = s->mb_width * 16;
 710         s->v_edge_pos = s->mb_height * 16;
 711
 712         s->mb_num     = s->mb_width * s->mb_height;
 713
 714         s->block_wrap[0] =
 715         s->block_wrap[1] =
 716         s->block_wrap[2] =
 717         s->block_wrap[3] = s->b8_stride;
 718         s->block_wrap[4] =
 719         s->block_wrap[5] = s->mb_stride;
 720
 721         y_size  = s->b8_stride * (2 * s->mb_height + 1);
 722         c_size  = s->mb_stride * (s->mb_height + 1);
 723         yc_size = y_size + 2   * c_size;
 724
 725         /* convert fourcc to upper case */
 726         s->codec_tag          = avpriv_toupper4(s->avctx->codec_tag);
 727
 728         s->stream_codec_tag   = avpriv_toupper4(s->avctx->stream_codec_tag);
 729
 730         s->avctx->coded_frame = &s->current_picture.f;
 731
 732         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num + 1) * sizeof(int),
 733                           fail); // error ressilience code looks cleaner with this
 734         for (y = 0; y < s->mb_height; y++)
 735             for (x = 0; x < s->mb_width; x++)
 736                 s->mb_index2xy[x + y * s->mb_width] = x + y * s->mb_stride;
 737
 738         s->mb_index2xy[s->mb_height * s->mb_width] =
 739                        (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?
 740
 741         if (s->encoding) {
 742             /* Allocate MV tables */
 743             FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base,
 744                               mv_table_size * 2 * sizeof(int16_t), fail);
 745             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base,
 746                               mv_table_size * 2 * sizeof(int16_t), fail);
 747             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base,
 748                               mv_table_size * 2 * sizeof(int16_t), fail);
 749             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base,
 750                               mv_table_size * 2 * sizeof(int16_t), fail);
 751             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base,
 752                               mv_table_size * 2 * sizeof(int16_t), fail);
 753             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base,
 754                               mv_table_size * 2 * sizeof(int16_t), fail);
 755             s->p_mv_table            = s->p_mv_table_base +
 756                                        s->mb_stride + 1;
 757             s->b_forw_mv_table       = s->b_forw_mv_table_base +
 758                                        s->mb_stride + 1;
 759             s->b_back_mv_table       = s->b_back_mv_table_base +
 760                                        s->mb_stride + 1;
 761             s->b_bidir_forw_mv_table = s->b_bidir_forw_mv_table_base +
 762                                        s->mb_stride + 1;
 763             s->b_bidir_back_mv_table = s->b_bidir_back_mv_table_base +
 764                                        s->mb_stride + 1;
 765             s->b_direct_mv_table     = s->b_direct_mv_table_base +
 766                                        s->mb_stride + 1;
 767
 768             if (s->msmpeg4_version) {
 769                 FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 770                                   2 * 2 * (MAX_LEVEL + 1) *
 771                                   (MAX_RUN + 1) * 2 * sizeof(int), fail);
 772             }
 773             FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 774
 775             /* Allocate MB type table */
 776             FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type, mb_array_size *
 777                               sizeof(uint16_t), fail); // needed for encoding
 778
 779             FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size *
 780                               sizeof(int), fail);
 781
 782             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,
 783                               64 * 32   * sizeof(int), fail);
 784             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,
 785                               64 * 32   * sizeof(int), fail);
 786             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16,
 787                               64 * 32 * 2 * sizeof(uint16_t), fail);
 788             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16,
 789                               64 * 32 * 2 * sizeof(uint16_t), fail);
 790             FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 791                               MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 792             FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 793                               MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 794
 795             if (s->avctx->noise_reduction) {
 796                 FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 797                                   2 * 64 * sizeof(uint16_t), fail);
 798             }
 799
 800             FF_ALLOC_OR_GOTO(s->avctx, s->cplx_tab,
 801                              mb_array_size * sizeof(float), fail);
 802             FF_ALLOC_OR_GOTO(s->avctx, s->bits_tab,
 803                              mb_array_size * sizeof(float), fail);
 804         }
 805     }
 806
 807     s->picture_count = MAX_PICTURE_COUNT * FFMAX(1, s->avctx->thread_count);
 808     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture,
 809                       s->picture_count * sizeof(Picture), fail);
 810     for (i = 0; i < s->picture_count; i++) {
 811         avcodec_get_frame_defaults(&s->picture[i].f);
 812     }
 813
 814     if (s->width && s->height) {
 815         FF_ALLOC_OR_GOTO(s->avctx, s->er_temp_buffer,
 816                          mb_array_size * sizeof(uint8_t), fail);
 817         FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table,
 818                           mb_array_size * sizeof(uint8_t), fail);
 819
 820         if (s->codec_id == AV_CODEC_ID_MPEG4 ||
 821             (s->flags & CODEC_FLAG_INTERLACED_ME)) {
 822             /* interlaced direct mode decoding tables */
 823             for (i = 0; i < 2; i++) {
 824                 int j, k;
 825                 for (j = 0; j < 2; j++) {
 826                     for (k = 0; k < 2; k++) {
 827                         FF_ALLOCZ_OR_GOTO(s->avctx,
 828                                           s->b_field_mv_table_base[i][j][k],
 829                                           mv_table_size * 2 * sizeof(int16_t),
 830                                           fail);
 831                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] +
 832                                                        s->mb_stride + 1;
 833                     }
 834                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j],
 835                                       mb_array_size * 2 * sizeof(uint8_t),
 836                                       fail);
 837                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j],
 838                                       mv_table_size * 2 * sizeof(int16_t),
 839                                       fail);
 840                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]
 841                                                 + s->mb_stride + 1;
 842                 }
 843                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i],
 844                                   mb_array_size * 2 * sizeof(uint8_t),
 845                                   fail);
 846             }
 847         }
 848         if (s->out_format == FMT_H263) {
 849             /* cbp values */
 850             FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
 851             s->coded_block = s->coded_block_base + s->b8_stride + 1;
 852
 853             /* cbp, ac_pred, pred_dir */
 854             FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table,
 855                               mb_array_size * sizeof(uint8_t), fail);
 856             FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table,
 857                               mb_array_size * sizeof(uint8_t), fail);
 858         }
 859
 860         if (s->h263_pred || s->h263_plus || !s->encoding) {
 861             /* dc values */
 862             // MN: we need these for  error resilience of intra-frames
 863             FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base,
 864                               yc_size * sizeof(int16_t), fail);
 865             s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 866             s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 867             s->dc_val[2] = s->dc_val[1] + c_size;
 868             for (i = 0; i < yc_size; i++)
 869                 s->dc_val_base[i] = 1024;
 870         }
 871
 872         /* which mb is a intra block */
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
 874         memset(s->mbintra_table, 1, mb_array_size);
 875
 876         /* init macroblock skip table */
 877         FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size + 2, fail);
 878         // Note the + 1 is for  a quicker mpeg4 slice_end detection
 879
 880         s->parse_context.state = -1;
 881         if ((s->avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
 882             s->avctx->debug_mv) {
 883             s->visualization_buffer[0] = av_malloc((s->mb_width * 16 +
 884                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 885             s->visualization_buffer[1] = av_malloc((s->mb_width * 16 +
 886                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 887             s->visualization_buffer[2] = av_malloc((s->mb_width * 16 +
 888                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 889         }
 890     }
 891
 892     s->context_initialized = 1;
 893     s->thread_context[0]   = s;
 894
 895     if (s->width && s->height) {
 896         if (nb_slices > 1) {
 897             for (i = 1; i < nb_slices; i++) {
 898                 s->thread_context[i] = av_malloc(sizeof(MpegEncContext));
 899                 memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 900             }
 901
 902             for (i = 0; i < nb_slices; i++) {
 903                 if (init_duplicate_context(s->thread_context[i], s) < 0)
 904                     goto fail;
 905                     s->thread_context[i]->start_mb_y =
 906                         (s->mb_height * (i) + nb_slices / 2) / nb_slices;
 907                     s->thread_context[i]->end_mb_y   =
 908                         (s->mb_height * (i + 1) + nb_slices / 2) / nb_slices;
 909             }
 910         } else {
 911             if (init_duplicate_context(s, s) < 0)
 912                 goto fail;
 913             s->start_mb_y = 0;
 914             s->end_mb_y   = s->mb_height;
 915         }
 916         s->slice_context_count = nb_slices;
 917     }
 918
 919     return 0;
 920  fail:
 921     ff_MPV_common_end(s);
 922     return -1;
 923 }
 924
 925 /* init common structure for both encoder and decoder */
 926 void ff_MPV_common_end(MpegEncContext *s)
 927 {
 928     int i, j, k;
 929
 930     if (s->slice_context_count > 1) {
 931         for (i = 0; i < s->slice_context_count; i++) {
 932             free_duplicate_context(s->thread_context[i]);
 933         }
 934         for (i = 1; i < s->slice_context_count; i++) {
 935             av_freep(&s->thread_context[i]);
 936         }
 937         s->slice_context_count = 1;
 938     } else free_duplicate_context(s);
 939
 940     av_freep(&s->parse_context.buffer);
 941     s->parse_context.buffer_size = 0;
 942
 943     av_freep(&s->mb_type);
 944     av_freep(&s->p_mv_table_base);
 945     av_freep(&s->b_forw_mv_table_base);
 946     av_freep(&s->b_back_mv_table_base);
 947     av_freep(&s->b_bidir_forw_mv_table_base);
 948     av_freep(&s->b_bidir_back_mv_table_base);
 949     av_freep(&s->b_direct_mv_table_base);
 950     s->p_mv_table            = NULL;
 951     s->b_forw_mv_table       = NULL;
 952     s->b_back_mv_table       = NULL;
 953     s->b_bidir_forw_mv_table = NULL;
 954     s->b_bidir_back_mv_table = NULL;
 955     s->b_direct_mv_table     = NULL;
 956     for (i = 0; i < 2; i++) {
 957         for (j = 0; j < 2; j++) {
 958             for (k = 0; k < 2; k++) {
 959                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 960                 s->b_field_mv_table[i][j][k] = NULL;
 961             }
 962             av_freep(&s->b_field_select_table[i][j]);
 963             av_freep(&s->p_field_mv_table_base[i][j]);
 964             s->p_field_mv_table[i][j] = NULL;
 965         }
 966         av_freep(&s->p_field_select_table[i]);
 967     }
 968
 969     av_freep(&s->dc_val_base);
 970     av_freep(&s->coded_block_base);
 971     av_freep(&s->mbintra_table);
 972     av_freep(&s->cbp_table);
 973     av_freep(&s->pred_dir_table);
 974
 975     av_freep(&s->mbskip_table);
 976     av_freep(&s->bitstream_buffer);
 977     s->allocated_bitstream_buffer_size = 0;
 978
 979     av_freep(&s->avctx->stats_out);
 980     av_freep(&s->ac_stats);
 981     av_freep(&s->error_status_table);
 982     av_freep(&s->er_temp_buffer);
 983     av_freep(&s->mb_index2xy);
 984     av_freep(&s->lambda_table);
 985     av_freep(&s->q_intra_matrix);
 986     av_freep(&s->q_inter_matrix);
 987     av_freep(&s->q_intra_matrix16);
 988     av_freep(&s->q_inter_matrix16);
 989     av_freep(&s->input_picture);
 990     av_freep(&s->reordered_input_picture);
 991     av_freep(&s->dct_offset);
 992     av_freep(&s->cplx_tab);
 993     av_freep(&s->bits_tab);
 994
 995     if (s->picture && !s->avctx->internal->is_copy) {
 996         for (i = 0; i < s->picture_count; i++) {
 997             free_picture(s, &s->picture[i]);
 998         }
 999     }
1000     av_freep(&s->picture);
1001     s->context_initialized      = 0;
1002     s->last_picture_ptr         =
1003     s->next_picture_ptr         =
1004     s->current_picture_ptr      = NULL;
1005     s->linesize = s->uvlinesize = 0;
1006
1007     for (i = 0; i < 3; i++)
1008         av_freep(&s->visualization_buffer[i]);
1009
1010     if (!(s->avctx->active_thread_type & FF_THREAD_FRAME))
1011         avcodec_default_free_buffers(s->avctx);
1012 }
1013
1014 void ff_init_rl(RLTable *rl,
1015                 uint8_t static_store[2][2 * MAX_RUN + MAX_LEVEL + 3])
1016 {
1017     int8_t  max_level[MAX_RUN + 1], max_run[MAX_LEVEL + 1];
1018     uint8_t index_run[MAX_RUN + 1];
1019     int last, run, level, start, end, i;
1020
1021     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1022     if (static_store && rl->max_level[0])
1023         return;
1024
1025     /* compute max_level[], max_run[] and index_run[] */
1026     for (last = 0; last < 2; last++) {
1027         if (last == 0) {
1028             start = 0;
1029             end = rl->last;
1030         } else {
1031             start = rl->last;
1032             end = rl->n;
1033         }
1034
1035         memset(max_level, 0, MAX_RUN + 1);
1036         memset(max_run, 0, MAX_LEVEL + 1);
1037         memset(index_run, rl->n, MAX_RUN + 1);
1038         for (i = start; i < end; i++) {
1039             run   = rl->table_run[i];
1040             level = rl->table_level[i];
1041             if (index_run[run] == rl->n)
1042                 index_run[run] = i;
1043             if (level > max_level[run])
1044                 max_level[run] = level;
1045             if (run > max_run[level])
1046                 max_run[level] = run;
1047         }
1048         if (static_store)
1049             rl->max_level[last] = static_store[last];
1050         else
1051             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1052         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1053         if (static_store)
1054             rl->max_run[last]   = static_store[last] + MAX_RUN + 1;
1055         else
1056             rl->max_run[last]   = av_malloc(MAX_LEVEL + 1);
1057         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1058         if (static_store)
1059             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1060         else
1061             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1062         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1063     }
1064 }
1065
1066 void ff_init_vlc_rl(RLTable *rl)
1067 {
1068     int i, q;
1069
1070     for (q = 0; q < 32; q++) {
1071         int qmul = q * 2;
1072         int qadd = (q - 1) | 1;
1073
1074         if (q == 0) {
1075             qmul = 1;
1076             qadd = 0;
1077         }
1078         for (i = 0; i < rl->vlc.table_size; i++) {
1079             int code = rl->vlc.table[i][0];
1080             int len  = rl->vlc.table[i][1];
1081             int level, run;
1082
1083             if (len == 0) { // illegal code
1084                 run   = 66;
1085                 level = MAX_LEVEL;
1086             } else if (len < 0) { // more bits needed
1087                 run   = 0;
1088                 level = code;
1089             } else {
1090                 if (code == rl->n) { // esc
1091                     run   = 66;
1092                     level =  0;
1093                 } else {
1094                     run   = rl->table_run[code] + 1;
1095                     level = rl->table_level[code] * qmul + qadd;
1096                     if (code >= rl->last) run += 192;
1097                 }
1098             }
1099             rl->rl_vlc[q][i].len   = len;
1100             rl->rl_vlc[q][i].level = level;
1101             rl->rl_vlc[q][i].run   = run;
1102         }
1103     }
1104 }
1105
1106 void ff_release_unused_pictures(MpegEncContext*s, int remove_current)
1107 {
1108     int i;
1109
1110     /* release non reference frames */
1111     for (i = 0; i < s->picture_count; i++) {
1112         if (s->picture[i].f.data[0] && !s->picture[i].f.reference &&
1113             (!s->picture[i].owner2 || s->picture[i].owner2 == s) &&
1114             (remove_current || &s->picture[i] !=  s->current_picture_ptr)
1115             /* && s->picture[i].type!= FF_BUFFER_TYPE_SHARED */) {
1116             free_frame_buffer(s, &s->picture[i]);
1117         }
1118     }
1119 }
1120
1121 int ff_find_unused_picture(MpegEncContext *s, int shared)
1122 {
1123     int i;
1124
1125     if (shared) {
1126         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1127             if (s->picture[i].f.data[0] == NULL && s->picture[i].f.type == 0)
1128                 return i;
1129         }
1130     } else {
1131         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1132             if (s->picture[i].f.data[0] == NULL && s->picture[i].f.type != 0)
1133                 return i; // FIXME
1134         }
1135         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1136             if (s->picture[i].f.data[0] == NULL)
1137                 return i;
1138         }
1139     }
1140
1141     return AVERROR_INVALIDDATA;
1142 }
1143
1144 static void update_noise_reduction(MpegEncContext *s)
1145 {
1146     int intra, i;
1147
1148     for (intra = 0; intra < 2; intra++) {
1149         if (s->dct_count[intra] > (1 << 16)) {
1150             for (i = 0; i < 64; i++) {
1151                 s->dct_error_sum[intra][i] >>= 1;
1152             }
1153             s->dct_count[intra] >>= 1;
1154         }
1155
1156         for (i = 0; i < 64; i++) {
1157             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1158                                        s->dct_count[intra] +
1159                                        s->dct_error_sum[intra][i] / 2) /
1160                                       (s->dct_error_sum[intra][i] + 1);
1161         }
1162     }
1163 }
1164
1165 /**
1166  * generic function for encode/decode called after coding/decoding
1167  * the header and before a frame is coded/decoded.
1168  */
1169 int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1170 {
1171     int i;
1172     Picture *pic;
1173     s->mb_skipped = 0;
1174
1175     assert(s->last_picture_ptr == NULL || s->out_format != FMT_H264 ||
1176            s->codec_id == AV_CODEC_ID_SVQ3);
1177
1178     /* mark & release old frames */
1179     if (s->out_format != FMT_H264 || s->codec_id == AV_CODEC_ID_SVQ3) {
1180         if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1181             s->last_picture_ptr != s->next_picture_ptr &&
1182             s->last_picture_ptr->f.data[0]) {
1183             if (s->last_picture_ptr->owner2 == s)
1184                 free_frame_buffer(s, s->last_picture_ptr);
1185         }
1186
1187         /* release forgotten pictures */
1188         /* if (mpeg124/h263) */
1189         if (!s->encoding) {
1190             for (i = 0; i < s->picture_count; i++) {
1191                 if (s->picture[i].owner2 == s && s->picture[i].f.data[0] &&
1192                     &s->picture[i] != s->last_picture_ptr &&
1193                     &s->picture[i] != s->next_picture_ptr &&
1194                     s->picture[i].f.reference) {
1195                     if (!(avctx->active_thread_type & FF_THREAD_FRAME))
1196                         av_log(avctx, AV_LOG_ERROR,
1197                                "releasing zombie picture\n");
1198                     free_frame_buffer(s, &s->picture[i]);
1199                 }
1200             }
1201         }
1202     }
1203
1204     if (!s->encoding) {
1205         ff_release_unused_pictures(s, 1);
1206
1207         if (s->current_picture_ptr &&
1208             s->current_picture_ptr->f.data[0] == NULL) {
1209             // we already have a unused image
1210             // (maybe it was set before reading the header)
1211             pic = s->current_picture_ptr;
1212         } else {
1213             i   = ff_find_unused_picture(s, 0);
1214             pic = &s->picture[i];
1215         }
1216
1217         pic->f.reference = 0;
1218         if (!s->dropable) {
1219             if (s->codec_id == AV_CODEC_ID_H264)
1220                 pic->f.reference = s->picture_structure;
1221             else if (s->pict_type != AV_PICTURE_TYPE_B)
1222                 pic->f.reference = 3;
1223         }
1224
1225         pic->f.coded_picture_number = s->coded_picture_number++;
1226
1227         if (ff_alloc_picture(s, pic, 0) < 0)
1228             return -1;
1229
1230         s->current_picture_ptr = pic;
1231         // FIXME use only the vars from current_pic
1232         s->current_picture_ptr->f.top_field_first = s->top_field_first;
1233         if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
1234             s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1235             if (s->picture_structure != PICT_FRAME)
1236                 s->current_picture_ptr->f.top_field_first =
1237                     (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
1238         }
1239         s->current_picture_ptr->f.interlaced_frame = !s->progressive_frame &&
1240                                                      !s->progressive_sequence;
1241         s->current_picture_ptr->field_picture      =  s->picture_structure != PICT_FRAME;
1242     }
1243
1244     s->current_picture_ptr->f.pict_type = s->pict_type;
1245     // if (s->flags && CODEC_FLAG_QSCALE)
1246     //     s->current_picture_ptr->quality = s->new_picture_ptr->quality;
1247     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1248
1249     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1250
1251     if (s->pict_type != AV_PICTURE_TYPE_B) {
1252         s->last_picture_ptr = s->next_picture_ptr;
1253         if (!s->dropable)
1254             s->next_picture_ptr = s->current_picture_ptr;
1255     }
1256     /* av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n",
1257            s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1258            s->last_picture_ptr    ? s->last_picture_ptr->f.data[0]    : NULL,
1259            s->next_picture_ptr    ? s->next_picture_ptr->f.data[0]    : NULL,
1260            s->current_picture_ptr ? s->current_picture_ptr->f.data[0] : NULL,
1261            s->pict_type, s->dropable); */
1262
1263     if (s->codec_id != AV_CODEC_ID_H264) {
1264         if ((s->last_picture_ptr == NULL ||
1265              s->last_picture_ptr->f.data[0] == NULL) &&
1266             (s->pict_type != AV_PICTURE_TYPE_I ||
1267              s->picture_structure != PICT_FRAME)) {
1268             if (s->pict_type != AV_PICTURE_TYPE_I)
1269                 av_log(avctx, AV_LOG_ERROR,
1270                        "warning: first frame is no keyframe\n");
1271             else if (s->picture_structure != PICT_FRAME)
1272                 av_log(avctx, AV_LOG_INFO,
1273                        "allocate dummy last picture for field based first keyframe\n");
1274
1275             /* Allocate a dummy frame */
1276             i = ff_find_unused_picture(s, 0);
1277             s->last_picture_ptr = &s->picture[i];
1278             if (ff_alloc_picture(s, s->last_picture_ptr, 0) < 0) {
1279                 s->last_picture_ptr = NULL;
1280                 return -1;
1281             }
1282             ff_thread_report_progress(&s->last_picture_ptr->f, INT_MAX, 0);
1283             ff_thread_report_progress(&s->last_picture_ptr->f, INT_MAX, 1);
1284             s->last_picture_ptr->f.reference = 3;
1285         }
1286         if ((s->next_picture_ptr == NULL ||
1287              s->next_picture_ptr->f.data[0] == NULL) &&
1288             s->pict_type == AV_PICTURE_TYPE_B) {
1289             /* Allocate a dummy frame */
1290             i = ff_find_unused_picture(s, 0);
1291             s->next_picture_ptr = &s->picture[i];
1292             if (ff_alloc_picture(s, s->next_picture_ptr, 0) < 0) {
1293                 s->next_picture_ptr = NULL;
1294                 return -1;
1295             }
1296             ff_thread_report_progress(&s->next_picture_ptr->f, INT_MAX, 0);
1297             ff_thread_report_progress(&s->next_picture_ptr->f, INT_MAX, 1);
1298             s->next_picture_ptr->f.reference = 3;
1299         }
1300     }
1301
1302     if (s->last_picture_ptr)
1303         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
1304     if (s->next_picture_ptr)
1305         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
1306
1307     if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_FRAME) &&
1308         (s->out_format != FMT_H264 || s->codec_id == AV_CODEC_ID_SVQ3)) {
1309         if (s->next_picture_ptr)
1310             s->next_picture_ptr->owner2 = s;
1311         if (s->last_picture_ptr)
1312             s->last_picture_ptr->owner2 = s;
1313     }
1314
1315     assert(s->pict_type == AV_PICTURE_TYPE_I || (s->last_picture_ptr &&
1316                                                  s->last_picture_ptr->f.data[0]));
1317
1318     if (s->picture_structure!= PICT_FRAME && s->out_format != FMT_H264) {
1319         int i;
1320         for (i = 0; i < 4; i++) {
1321             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1322                 s->current_picture.f.data[i] +=
1323                     s->current_picture.f.linesize[i];
1324             }
1325             s->current_picture.f.linesize[i] *= 2;
1326             s->last_picture.f.linesize[i]    *= 2;
1327             s->next_picture.f.linesize[i]    *= 2;
1328         }
1329     }
1330
1331     s->err_recognition = avctx->err_recognition;
1332
1333     /* set dequantizer, we can't do it during init as
1334      * it might change for mpeg4 and we can't do it in the header
1335      * decode as init is not called for mpeg4 there yet */
1336     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1337         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1338         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1339     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1340         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1341         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1342     } else {
1343         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1344         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1345     }
1346
1347     if (s->dct_error_sum) {
1348         assert(s->avctx->noise_reduction && s->encoding);
1349         update_noise_reduction(s);
1350     }
1351
1352     if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1353         return ff_xvmc_field_start(s, avctx);
1354
1355     return 0;
1356 }
1357
1358 /* generic function for encode/decode called after a
1359  * frame has been coded/decoded. */
1360 void ff_MPV_frame_end(MpegEncContext *s)
1361 {
1362     int i;
1363     /* redraw edges for the frame if decoding didn't complete */
1364     // just to make sure that all data is rendered.
1365     if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration) {
1366         ff_xvmc_field_end(s);
1367    } else if ((s->error_count || s->encoding) &&
1368               !s->avctx->hwaccel &&
1369               !(s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) &&
1370               s->unrestricted_mv &&
1371               s->current_picture.f.reference &&
1372               !s->intra_only &&
1373               !(s->flags & CODEC_FLAG_EMU_EDGE)) {
1374         int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
1375         int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
1376         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1377                           s->h_edge_pos, s->v_edge_pos,
1378                           EDGE_WIDTH, EDGE_WIDTH,
1379                           EDGE_TOP | EDGE_BOTTOM);
1380         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1381                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1382                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1383                           EDGE_TOP | EDGE_BOTTOM);
1384         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1385                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1386                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1387                           EDGE_TOP | EDGE_BOTTOM);
1388     }
1389
1390     emms_c();
1391
1392     s->last_pict_type                 = s->pict_type;
1393     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1394     if (s->pict_type!= AV_PICTURE_TYPE_B) {
1395         s->last_non_b_pict_type = s->pict_type;
1396     }
1397 #if 0
1398     /* copy back current_picture variables */
1399     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1400         if (s->picture[i].f.data[0] == s->current_picture.f.data[0]) {
1401             s->picture[i] = s->current_picture;
1402             break;
1403         }
1404     }
1405     assert(i < MAX_PICTURE_COUNT);
1406 #endif
1407
1408     if (s->encoding) {
1409         /* release non-reference frames */
1410         for (i = 0; i < s->picture_count; i++) {
1411             if (s->picture[i].f.data[0] && !s->picture[i].f.reference
1412                 /* && s->picture[i].type != FF_BUFFER_TYPE_SHARED */) {
1413                 free_frame_buffer(s, &s->picture[i]);
1414             }
1415         }
1416     }
1417     // clear copies, to avoid confusion
1418 #if 0
1419     memset(&s->last_picture,    0, sizeof(Picture));
1420     memset(&s->next_picture,    0, sizeof(Picture));
1421     memset(&s->current_picture, 0, sizeof(Picture));
1422 #endif
1423     s->avctx->coded_frame = &s->current_picture_ptr->f;
1424
1425     if (s->codec_id != AV_CODEC_ID_H264 && s->current_picture.f.reference) {
1426         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
1427     }
1428 }
1429
1430 /**
1431  * Draw a line from (ex, ey) -> (sx, sy).
1432  * @param w width of the image
1433  * @param h height of the image
1434  * @param stride stride/linesize of the image
1435  * @param color color of the arrow
1436  */
1437 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
1438                       int w, int h, int stride, int color)
1439 {
1440     int x, y, fr, f;
1441
1442     sx = av_clip(sx, 0, w - 1);
1443     sy = av_clip(sy, 0, h - 1);
1444     ex = av_clip(ex, 0, w - 1);
1445     ey = av_clip(ey, 0, h - 1);
1446
1447     buf[sy * stride + sx] += color;
1448
1449     if (FFABS(ex - sx) > FFABS(ey - sy)) {
1450         if (sx > ex) {
1451             FFSWAP(int, sx, ex);
1452             FFSWAP(int, sy, ey);
1453         }
1454         buf += sx + sy * stride;
1455         ex  -= sx;
1456         f    = ((ey - sy) << 16) / ex;
1457         for (x = 0; x = ex; x++) {
1458             y  = (x * f) >> 16;
1459             fr = (x * f) & 0xFFFF;
1460             buf[y * stride + x]       += (color * (0x10000 - fr)) >> 16;
1461             buf[(y + 1) * stride + x] += (color *            fr ) >> 16;
1462         }
1463     } else {
1464         if (sy > ey) {
1465             FFSWAP(int, sx, ex);
1466             FFSWAP(int, sy, ey);
1467         }
1468         buf += sx + sy * stride;
1469         ey  -= sy;
1470         if (ey)
1471             f  = ((ex - sx) << 16) / ey;
1472         else
1473             f = 0;
1474         for (y = 0; y = ey; y++) {
1475             x  = (y * f) >> 16;
1476             fr = (y * f) & 0xFFFF;
1477             buf[y * stride + x]     += (color * (0x10000 - fr)) >> 16;
1478             buf[y * stride + x + 1] += (color *            fr ) >> 16;
1479         }
1480     }
1481 }
1482
1483 /**
1484  * Draw an arrow from (ex, ey) -> (sx, sy).
1485  * @param w width of the image
1486  * @param h height of the image
1487  * @param stride stride/linesize of the image
1488  * @param color color of the arrow
1489  */
1490 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
1491                        int ey, int w, int h, int stride, int color)
1492 {
1493     int dx,dy;
1494
1495     sx = av_clip(sx, -100, w + 100);
1496     sy = av_clip(sy, -100, h + 100);
1497     ex = av_clip(ex, -100, w + 100);
1498     ey = av_clip(ey, -100, h + 100);
1499
1500     dx = ex - sx;
1501     dy = ey - sy;
1502
1503     if (dx * dx + dy * dy > 3 * 3) {
1504         int rx =  dx + dy;
1505         int ry = -dx + dy;
1506         int length = ff_sqrt((rx * rx + ry * ry) << 8);
1507
1508         // FIXME subpixel accuracy
1509         rx = ROUNDED_DIV(rx * 3 << 4, length);
1510         ry = ROUNDED_DIV(ry * 3 << 4, length);
1511
1512         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1513         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1514     }
1515     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1516 }
1517
1518 /**
1519  * Print debugging info for the given picture.
1520  */
1521 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict)
1522 {
1523     if (s->avctx->hwaccel || !pict || !pict->mb_type)
1524         return;
1525
1526     if (s->avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
1527         int x,y;
1528
1529         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1530         switch (pict->pict_type) {
1531         case AV_PICTURE_TYPE_I:
1532             av_log(s->avctx,AV_LOG_DEBUG,"I\n");
1533             break;
1534         case AV_PICTURE_TYPE_P:
1535             av_log(s->avctx,AV_LOG_DEBUG,"P\n");
1536             break;
1537         case AV_PICTURE_TYPE_B:
1538             av_log(s->avctx,AV_LOG_DEBUG,"B\n");
1539             break;
1540         case AV_PICTURE_TYPE_S:
1541             av_log(s->avctx,AV_LOG_DEBUG,"S\n");
1542             break;
1543         case AV_PICTURE_TYPE_SI:
1544             av_log(s->avctx,AV_LOG_DEBUG,"SI\n");
1545             break;
1546         case AV_PICTURE_TYPE_SP:
1547             av_log(s->avctx,AV_LOG_DEBUG,"SP\n");
1548             break;
1549         }
1550         for (y = 0; y < s->mb_height; y++) {
1551             for (x = 0; x < s->mb_width; x++) {
1552                 if (s->avctx->debug & FF_DEBUG_SKIP) {
1553                     int count = s->mbskip_table[x + y * s->mb_stride];
1554                     if (count > 9)
1555                         count = 9;
1556                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1557                 }
1558                 if (s->avctx->debug & FF_DEBUG_QP) {
1559                     av_log(s->avctx, AV_LOG_DEBUG, "%2d",
1560                            pict->qscale_table[x + y * s->mb_stride]);
1561                 }
1562                 if (s->avctx->debug & FF_DEBUG_MB_TYPE) {
1563                     int mb_type = pict->mb_type[x + y * s->mb_stride];
1564                     // Type & MV direction
1565                     if (IS_PCM(mb_type))
1566                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1567                     else if (IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1568                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1569                     else if (IS_INTRA4x4(mb_type))
1570                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1571                     else if (IS_INTRA16x16(mb_type))
1572                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1573                     else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1574                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1575                     else if (IS_DIRECT(mb_type))
1576                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1577                     else if (IS_GMC(mb_type) && IS_SKIP(mb_type))
1578                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1579                     else if (IS_GMC(mb_type))
1580                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1581                     else if (IS_SKIP(mb_type))
1582                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1583                     else if (!USES_LIST(mb_type, 1))
1584                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1585                     else if (!USES_LIST(mb_type, 0))
1586                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1587                     else {
1588                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1589                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1590                     }
1591
1592                     // segmentation
1593                     if (IS_8X8(mb_type))
1594                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1595                     else if (IS_16X8(mb_type))
1596                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1597                     else if (IS_8X16(mb_type))
1598                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1599                     else if (IS_INTRA(mb_type) || IS_16X16(mb_type))
1600                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1601                     else
1602                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1603
1604
1605                     if (IS_INTERLACED(mb_type))
1606                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1607                     else
1608                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1609                 }
1610                 // av_log(s->avctx, AV_LOG_DEBUG, " ");
1611             }
1612             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1613         }
1614     }
1615
1616     if ((s->avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
1617         (s->avctx->debug_mv)) {
1618         const int shift = 1 + s->quarter_sample;
1619         int mb_y;
1620         uint8_t *ptr;
1621         int i;
1622         int h_chroma_shift, v_chroma_shift, block_height;
1623         const int width          = s->avctx->width;
1624         const int height         = s->avctx->height;
1625         const int mv_sample_log2 = 4 - pict->motion_subsample_log2;
1626         const int mv_stride      = (s->mb_width << mv_sample_log2) +
1627                                    (s->codec_id == AV_CODEC_ID_H264 ? 0 : 1);
1628         s->low_delay = 0; // needed to see the vectors without trashing the buffers
1629
1630         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,
1631                                       &h_chroma_shift, &v_chroma_shift);
1632         for (i = 0; i < 3; i++) {
1633             memcpy(s->visualization_buffer[i], pict->data[i],
1634                    (i == 0) ? pict->linesize[i] * height:
1635                               pict->linesize[i] * height >> v_chroma_shift);
1636             pict->data[i] = s->visualization_buffer[i];
1637         }
1638         pict->type   = FF_BUFFER_TYPE_COPY;
1639         ptr          = pict->data[0];
1640         block_height = 16 >> v_chroma_shift;
1641
1642         for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1643             int mb_x;
1644             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1645                 const int mb_index = mb_x + mb_y * s->mb_stride;
1646                 if ((s->avctx->debug_mv) && pict->motion_val) {
1647                     int type;
1648                     for (type = 0; type < 3; type++) {
1649                         int direction = 0;
1650                         switch (type) {
1651                         case 0:
1652                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_P_FOR)) ||
1653                                 (pict->pict_type!= AV_PICTURE_TYPE_P))
1654                                 continue;
1655                             direction = 0;
1656                             break;
1657                         case 1:
1658                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_B_FOR)) ||
1659                                 (pict->pict_type!= AV_PICTURE_TYPE_B))
1660                                 continue;
1661                             direction = 0;
1662                             break;
1663                         case 2:
1664                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_B_BACK)) ||
1665                                 (pict->pict_type!= AV_PICTURE_TYPE_B))
1666                                 continue;
1667                             direction = 1;
1668                             break;
1669                         }
1670                         if (!USES_LIST(pict->mb_type[mb_index], direction))
1671                             continue;
1672
1673                         if (IS_8X8(pict->mb_type[mb_index])) {
1674                             int i;
1675                             for (i = 0; i < 4; i++) {
1676                                 int sx = mb_x * 16 + 4 + 8 * (i & 1);
1677                                 int sy = mb_y * 16 + 4 + 8 * (i >> 1);
1678                                 int xy = (mb_x * 2 + (i & 1) +
1679                                           (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
1680                                 int mx = (pict->motion_val[direction][xy][0] >> shift) + sx;
1681                                 int my = (pict->motion_val[direction][xy][1] >> shift) + sy;
1682                                 draw_arrow(ptr, sx, sy, mx, my, width,
1683                                            height, s->linesize, 100);
1684                             }
1685                         } else if (IS_16X8(pict->mb_type[mb_index])) {
1686                             int i;
1687                             for (i = 0; i < 2; i++) {
1688                                 int sx = mb_x * 16 + 8;
1689                                 int sy = mb_y * 16 + 4 + 8 * i;
1690                                 int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
1691                                 int mx = (pict->motion_val[direction][xy][0] >> shift);
1692                                 int my = (pict->motion_val[direction][xy][1] >> shift);
1693
1694                                 if (IS_INTERLACED(pict->mb_type[mb_index]))
1695                                     my *= 2;
1696
1697                             draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
1698                                        height, s->linesize, 100);
1699                             }
1700                         } else if (IS_8X16(pict->mb_type[mb_index])) {
1701                             int i;
1702                             for (i = 0; i < 2; i++) {
1703                                 int sx = mb_x * 16 + 4 + 8 * i;
1704                                 int sy = mb_y * 16 + 8;
1705                                 int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
1706                                 int mx = pict->motion_val[direction][xy][0] >> shift;
1707                                 int my = pict->motion_val[direction][xy][1] >> shift;
1708
1709                                 if (IS_INTERLACED(pict->mb_type[mb_index]))
1710                                     my *= 2;
1711
1712                                 draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
1713                                            height, s->linesize, 100);
1714                             }
1715                         } else {
1716                               int sx = mb_x * 16 + 8;
1717                               int sy = mb_y * 16 + 8;
1718                               int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
1719                               int mx = pict->motion_val[direction][xy][0] >> shift + sx;
1720                               int my = pict->motion_val[direction][xy][1] >> shift + sy;
1721                               draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1722                         }
1723                     }
1724                 }
1725                 if ((s->avctx->debug & FF_DEBUG_VIS_QP) && pict->motion_val) {
1726                     uint64_t c = (pict->qscale_table[mb_index] * 128 / 31) *
1727                                  0x0101010101010101ULL;
1728                     int y;
1729                     for (y = 0; y < block_height; y++) {
1730                         *(uint64_t *)(pict->data[1] + 8 * mb_x +
1731                                       (block_height * mb_y + y) *
1732                                       pict->linesize[1]) = c;
1733                         *(uint64_t *)(pict->data[2] + 8 * mb_x +
1734                                       (block_height * mb_y + y) *
1735                                       pict->linesize[2]) = c;
1736                     }
1737                 }
1738                 if ((s->avctx->debug & FF_DEBUG_VIS_MB_TYPE) &&
1739                     pict->motion_val) {
1740                     int mb_type = pict->mb_type[mb_index];
1741                     uint64_t u,v;
1742                     int y;
1743 #define COLOR(theta, r) \
1744     u = (int)(128 + r * cos(theta * 3.141592 / 180)); \
1745     v = (int)(128 + r * sin(theta * 3.141592 / 180));
1746
1747
1748                     u = v = 128;
1749                     if (IS_PCM(mb_type)) {
1750                         COLOR(120, 48)
1751                     } else if ((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) ||
1752                                IS_INTRA16x16(mb_type)) {
1753                         COLOR(30, 48)
1754                     } else if (IS_INTRA4x4(mb_type)) {
1755                         COLOR(90, 48)
1756                     } else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type)) {
1757                         // COLOR(120, 48)
1758                     } else if (IS_DIRECT(mb_type)) {
1759                         COLOR(150, 48)
1760                     } else if (IS_GMC(mb_type) && IS_SKIP(mb_type)) {
1761                         COLOR(170, 48)
1762                     } else if (IS_GMC(mb_type)) {
1763                         COLOR(190, 48)
1764                     } else if (IS_SKIP(mb_type)) {
1765                         // COLOR(180, 48)
1766                     } else if (!USES_LIST(mb_type, 1)) {
1767                         COLOR(240, 48)
1768                     } else if (!USES_LIST(mb_type, 0)) {
1769                         COLOR(0, 48)
1770                     } else {
1771                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1772                         COLOR(300,48)
1773                     }
1774
1775                     u *= 0x0101010101010101ULL;
1776                     v *= 0x0101010101010101ULL;
1777                     for (y = 0; y < block_height; y++) {
1778                         *(uint64_t *)(pict->data[1] + 8 * mb_x +
1779                                       (block_height * mb_y + y) * pict->linesize[1]) = u;
1780                         *(uint64_t *)(pict->data[2] + 8 * mb_x +
1781                                       (block_height * mb_y + y) * pict->linesize[2]) = v;
1782                     }
1783
1784                     // segmentation
1785                     if (IS_8X8(mb_type) || IS_16X8(mb_type)) {
1786                         *(uint64_t *)(pict->data[0] + 16 * mb_x + 0 +
1787                                       (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
1788                         *(uint64_t *)(pict->data[0] + 16 * mb_x + 8 +
1789                                       (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
1790                     }
1791                     if (IS_8X8(mb_type) || IS_8X16(mb_type)) {
1792                         for (y = 0; y < 16; y++)
1793                             pict->data[0][16 * mb_x + 8 + (16 * mb_y + y) *
1794                                           pict->linesize[0]] ^= 0x80;
1795                     }
1796                     if (IS_8X8(mb_type) && mv_sample_log2 >= 2) {
1797                         int dm = 1 << (mv_sample_log2 - 2);
1798                         for (i = 0; i < 4; i++) {
1799                             int sx = mb_x * 16 + 8 * (i & 1);
1800                             int sy = mb_y * 16 + 8 * (i >> 1);
1801                             int xy = (mb_x * 2 + (i & 1) +
1802                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
1803                             // FIXME bidir
1804                             int32_t *mv = (int32_t *) &pict->motion_val[0][xy];
1805                             if (mv[0] != mv[dm] ||
1806                                 mv[dm * mv_stride] != mv[dm * (mv_stride + 1)])
1807                                 for (y = 0; y < 8; y++)
1808                                     pict->data[0][sx + 4 + (sy + y) * pict->linesize[0]] ^= 0x80;
1809                             if (mv[0] != mv[dm * mv_stride] || mv[dm] != mv[dm * (mv_stride + 1)])
1810                                 *(uint64_t *)(pict->data[0] + sx + (sy + 4) *
1811                                               pict->linesize[0]) ^= 0x8080808080808080ULL;
1812                         }
1813                     }
1814
1815                     if (IS_INTERLACED(mb_type) &&
1816                         s->codec_id == AV_CODEC_ID_H264) {
1817                         // hmm
1818                     }
1819                 }
1820                 s->mbskip_table[mb_index] = 0;
1821             }
1822         }
1823     }
1824 }
1825
1826 /**
1827  * find the lowest MB row referenced in the MVs
1828  */
1829 int ff_MPV_lowest_referenced_row(MpegEncContext *s, int dir)
1830 {
1831     int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample;
1832     int my, off, i, mvs;
1833
1834     if (s->picture_structure != PICT_FRAME) goto unhandled;
1835
1836     switch (s->mv_type) {
1837         case MV_TYPE_16X16:
1838             mvs = 1;
1839             break;
1840         case MV_TYPE_16X8:
1841             mvs = 2;
1842             break;
1843         case MV_TYPE_8X8:
1844             mvs = 4;
1845             break;
1846         default:
1847             goto unhandled;
1848     }
1849
1850     for (i = 0; i < mvs; i++) {
1851         my = s->mv[dir][i][1]<<qpel_shift;
1852         my_max = FFMAX(my_max, my);
1853         my_min = FFMIN(my_min, my);
1854     }
1855
1856     off = (FFMAX(-my_min, my_max) + 63) >> 6;
1857
1858     return FFMIN(FFMAX(s->mb_y + off, 0), s->mb_height-1);
1859 unhandled:
1860     return s->mb_height-1;
1861 }
1862
1863 /* put block[] to dest[] */
1864 static inline void put_dct(MpegEncContext *s,
1865                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1866 {
1867     s->dct_unquantize_intra(s, block, i, qscale);
1868     s->dsp.idct_put (dest, line_size, block);
1869 }
1870
1871 /* add block[] to dest[] */
1872 static inline void add_dct(MpegEncContext *s,
1873                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1874 {
1875     if (s->block_last_index[i] >= 0) {
1876         s->dsp.idct_add (dest, line_size, block);
1877     }
1878 }
1879
1880 static inline void add_dequant_dct(MpegEncContext *s,
1881                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1882 {
1883     if (s->block_last_index[i] >= 0) {
1884         s->dct_unquantize_inter(s, block, i, qscale);
1885
1886         s->dsp.idct_add (dest, line_size, block);
1887     }
1888 }
1889
1890 /**
1891  * Clean dc, ac, coded_block for the current non-intra MB.
1892  */
1893 void ff_clean_intra_table_entries(MpegEncContext *s)
1894 {
1895     int wrap = s->b8_stride;
1896     int xy = s->block_index[0];
1897
1898     s->dc_val[0][xy           ] =
1899     s->dc_val[0][xy + 1       ] =
1900     s->dc_val[0][xy     + wrap] =
1901     s->dc_val[0][xy + 1 + wrap] = 1024;
1902     /* ac pred */
1903     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1904     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1905     if (s->msmpeg4_version>=3) {
1906         s->coded_block[xy           ] =
1907         s->coded_block[xy + 1       ] =
1908         s->coded_block[xy     + wrap] =
1909         s->coded_block[xy + 1 + wrap] = 0;
1910     }
1911     /* chroma */
1912     wrap = s->mb_stride;
1913     xy = s->mb_x + s->mb_y * wrap;
1914     s->dc_val[1][xy] =
1915     s->dc_val[2][xy] = 1024;
1916     /* ac pred */
1917     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1918     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1919
1920     s->mbintra_table[xy]= 0;
1921 }
1922
1923 /* generic function called after a macroblock has been parsed by the
1924    decoder or after it has been encoded by the encoder.
1925
1926    Important variables used:
1927    s->mb_intra : true if intra macroblock
1928    s->mv_dir   : motion vector direction
1929    s->mv_type  : motion vector type
1930    s->mv       : motion vector
1931    s->interlaced_dct : true if interlaced dct used (mpeg2)
1932  */
1933 static av_always_inline
1934 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1935                             int is_mpeg12)
1936 {
1937     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1938     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1939         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1940         return;
1941     }
1942
1943     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1944        /* save DCT coefficients */
1945        int i,j;
1946        DCTELEM *dct = &s->current_picture.f.dct_coeff[mb_xy * 64 * 6];
1947        av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y);
1948        for(i=0; i<6; i++){
1949            for(j=0; j<64; j++){
1950                *dct++ = block[i][s->dsp.idct_permutation[j]];
1951                av_log(s->avctx, AV_LOG_DEBUG, "%5d", dct[-1]);
1952            }
1953            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1954        }
1955     }
1956
1957     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1958
1959     /* update DC predictors for P macroblocks */
1960     if (!s->mb_intra) {
1961         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1962             if(s->mbintra_table[mb_xy])
1963                 ff_clean_intra_table_entries(s);
1964         } else {
1965             s->last_dc[0] =
1966             s->last_dc[1] =
1967             s->last_dc[2] = 128 << s->intra_dc_precision;
1968         }
1969     }
1970     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1971         s->mbintra_table[mb_xy]=1;
1972
1973     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==AV_PICTURE_TYPE_B) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1974         uint8_t *dest_y, *dest_cb, *dest_cr;
1975         int dct_linesize, dct_offset;
1976         op_pixels_func (*op_pix)[4];
1977         qpel_mc_func (*op_qpix)[16];
1978         const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
1979         const int uvlinesize = s->current_picture.f.linesize[1];
1980         const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band;
1981         const int block_size = 8;
1982
1983         /* avoid copy if macroblock skipped in last frame too */
1984         /* skip only during decoding as we might trash the buffers during encoding a bit */
1985         if(!s->encoding){
1986             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1987
1988             if (s->mb_skipped) {
1989                 s->mb_skipped= 0;
1990                 assert(s->pict_type!=AV_PICTURE_TYPE_I);
1991                 *mbskip_ptr = 1;
1992             } else if(!s->current_picture.f.reference) {
1993                 *mbskip_ptr = 1;
1994             } else{
1995                 *mbskip_ptr = 0; /* not skipped */
1996             }
1997         }
1998
1999         dct_linesize = linesize << s->interlaced_dct;
2000         dct_offset   = s->interlaced_dct ? linesize : linesize * block_size;
2001
2002         if(readable){
2003             dest_y=  s->dest[0];
2004             dest_cb= s->dest[1];
2005             dest_cr= s->dest[2];
2006         }else{
2007             dest_y = s->b_scratchpad;
2008             dest_cb= s->b_scratchpad+16*linesize;
2009             dest_cr= s->b_scratchpad+32*linesize;
2010         }
2011
2012         if (!s->mb_intra) {
2013             /* motion handling */
2014             /* decoding or more than one mb_type (MC was already done otherwise) */
2015             if(!s->encoding){
2016
2017                 if(HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
2018                     if (s->mv_dir & MV_DIR_FORWARD) {
2019                         ff_thread_await_progress(&s->last_picture_ptr->f,
2020                                                  ff_MPV_lowest_referenced_row(s, 0),
2021                                                  0);
2022                     }
2023                     if (s->mv_dir & MV_DIR_BACKWARD) {
2024                         ff_thread_await_progress(&s->next_picture_ptr->f,
2025                                                  ff_MPV_lowest_referenced_row(s, 1),
2026                                                  0);
2027                     }
2028                 }
2029
2030                 op_qpix= s->me.qpel_put;
2031                 if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
2032                     op_pix = s->dsp.put_pixels_tab;
2033                 }else{
2034                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2035                 }
2036                 if (s->mv_dir & MV_DIR_FORWARD) {
2037                     ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data, op_pix, op_qpix);
2038                     op_pix = s->dsp.avg_pixels_tab;
2039                     op_qpix= s->me.qpel_avg;
2040                 }
2041                 if (s->mv_dir & MV_DIR_BACKWARD) {
2042                     ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data, op_pix, op_qpix);
2043                 }
2044             }
2045
2046             /* skip dequant / idct if we are really late ;) */
2047             if(s->avctx->skip_idct){
2048                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B)
2049                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I)
2050                    || s->avctx->skip_idct >= AVDISCARD_ALL)
2051                     goto skip_idct;
2052             }
2053
2054             /* add dct residue */
2055             if(s->encoding || !(   s->msmpeg4_version || s->codec_id==AV_CODEC_ID_MPEG1VIDEO || s->codec_id==AV_CODEC_ID_MPEG2VIDEO
2056                                 || (s->codec_id==AV_CODEC_ID_MPEG4 && !s->mpeg_quant))){
2057                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2058                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2059                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2060                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2061
2062                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2063                     if (s->chroma_y_shift){
2064                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2065                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2066                     }else{
2067                         dct_linesize >>= 1;
2068                         dct_offset >>=1;
2069                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2070                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2071                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2072                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2073                     }
2074                 }
2075             } else if(is_mpeg12 || (s->codec_id != AV_CODEC_ID_WMV2)){
2076                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
2077                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
2078                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
2079                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
2080
2081                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2082                     if(s->chroma_y_shift){//Chroma420
2083                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2084                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2085                     }else{
2086                         //chroma422
2087                         dct_linesize = uvlinesize << s->interlaced_dct;
2088                         dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
2089
2090                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2091                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2092                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2093                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2094                         if(!s->chroma_x_shift){//Chroma444
2095                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2096                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2097                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2098                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2099                         }
2100                     }
2101                 }//fi gray
2102             }
2103             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
2104                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2105             }
2106         } else {
2107             /* dct only in intra block */
2108             if(s->encoding || !(s->codec_id==AV_CODEC_ID_MPEG1VIDEO || s->codec_id==AV_CODEC_ID_MPEG2VIDEO)){
2109                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2110                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2111                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2112                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2113
2114                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2115                     if(s->chroma_y_shift){
2116                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2117                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2118                     }else{
2119                         dct_offset >>=1;
2120                         dct_linesize >>=1;
2121                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2122                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2123                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2124                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2125                     }
2126                 }
2127             }else{
2128                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2129                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2130                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2131                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2132
2133                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2134                     if(s->chroma_y_shift){
2135                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2136                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2137                     }else{
2138
2139                         dct_linesize = uvlinesize << s->interlaced_dct;
2140                         dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
2141
2142                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2143                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2144                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2145                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2146                         if(!s->chroma_x_shift){//Chroma444
2147                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2148                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2149                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2150                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2151                         }
2152                     }
2153                 }//gray
2154             }
2155         }
2156 skip_idct:
2157         if(!readable){
2158             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2159             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2160             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2161         }
2162     }
2163 }
2164
2165 void ff_MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2166 #if !CONFIG_SMALL
2167     if(s->out_format == FMT_MPEG1) {
2168         MPV_decode_mb_internal(s, block, 1);
2169     } else
2170 #endif
2171         MPV_decode_mb_internal(s, block, 0);
2172 }
2173
2174 /**
2175  * @param h is the normal height, this will be reduced automatically if needed for the last row
2176  */
2177 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2178     const int field_pic= s->picture_structure != PICT_FRAME;
2179     if(field_pic){
2180         h <<= 1;
2181         y <<= 1;
2182     }
2183
2184     if (!s->avctx->hwaccel
2185        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2186        && s->unrestricted_mv
2187        && s->current_picture.f.reference
2188        && !s->intra_only
2189        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
2190         int sides = 0, edge_h;
2191         int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
2192         int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
2193         if (y==0) sides |= EDGE_TOP;
2194         if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
2195
2196         edge_h= FFMIN(h, s->v_edge_pos - y);
2197
2198         s->dsp.draw_edges(s->current_picture_ptr->f.data[0] +  y         *s->linesize,
2199                           s->linesize,           s->h_edge_pos,         edge_h,
2200                           EDGE_WIDTH,            EDGE_WIDTH,            sides);
2201         s->dsp.draw_edges(s->current_picture_ptr->f.data[1] + (y>>vshift)*s->uvlinesize,
2202                           s->uvlinesize,         s->h_edge_pos>>hshift, edge_h>>vshift,
2203                           EDGE_WIDTH>>hshift,    EDGE_WIDTH>>vshift,    sides);
2204         s->dsp.draw_edges(s->current_picture_ptr->f.data[2] + (y>>vshift)*s->uvlinesize,
2205                           s->uvlinesize,         s->h_edge_pos>>hshift, edge_h>>vshift,
2206                           EDGE_WIDTH>>hshift,    EDGE_WIDTH>>vshift,    sides);
2207     }
2208
2209     h= FFMIN(h, s->avctx->height - y);
2210
2211     if(field_pic && s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2212
2213     if (s->avctx->draw_horiz_band) {
2214         AVFrame *src;
2215         int offset[AV_NUM_DATA_POINTERS];
2216         int i;
2217
2218         if(s->pict_type==AV_PICTURE_TYPE_B || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2219             src = &s->current_picture_ptr->f;
2220         else if(s->last_picture_ptr)
2221             src = &s->last_picture_ptr->f;
2222         else
2223             return;
2224
2225         if(s->pict_type==AV_PICTURE_TYPE_B && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2226             for (i = 0; i < AV_NUM_DATA_POINTERS; i++)
2227                 offset[i] = 0;
2228         }else{
2229             offset[0]= y * s->linesize;
2230             offset[1]=
2231             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2232             for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
2233                 offset[i] = 0;
2234         }
2235
2236         emms_c();
2237
2238         s->avctx->draw_horiz_band(s->avctx, src, offset,
2239                                   y, s->picture_structure, h);
2240     }
2241 }
2242
2243 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2244     const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
2245     const int uvlinesize = s->current_picture.f.linesize[1];
2246     const int mb_size= 4;
2247
2248     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2249     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2250     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2251     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2252     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2253     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2254     //block_index is not used by mpeg2, so it is not affected by chroma_format
2255
2256     s->dest[0] = s->current_picture.f.data[0] + ((s->mb_x - 1) <<  mb_size);
2257     s->dest[1] = s->current_picture.f.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2258     s->dest[2] = s->current_picture.f.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2259
2260     if(!(s->pict_type==AV_PICTURE_TYPE_B && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2261     {
2262         if(s->picture_structure==PICT_FRAME){
2263         s->dest[0] += s->mb_y *   linesize << mb_size;
2264         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2265         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2266         }else{
2267             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2268             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2269             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2270             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2271         }
2272     }
2273 }
2274
2275 void ff_mpeg_flush(AVCodecContext *avctx){
2276     int i;
2277     MpegEncContext *s = avctx->priv_data;
2278
2279     if(s==NULL || s->picture==NULL)
2280         return;
2281
2282     for(i=0; i<s->picture_count; i++){
2283        if (s->picture[i].f.data[0] &&
2284            (s->picture[i].f.type == FF_BUFFER_TYPE_INTERNAL ||
2285             s->picture[i].f.type == FF_BUFFER_TYPE_USER))
2286         free_frame_buffer(s, &s->picture[i]);
2287     }
2288     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2289
2290     s->mb_x= s->mb_y= 0;
2291
2292     s->parse_context.state= -1;
2293     s->parse_context.frame_start_found= 0;
2294     s->parse_context.overread= 0;
2295     s->parse_context.overread_index= 0;
2296     s->parse_context.index= 0;
2297     s->parse_context.last_index= 0;
2298     s->bitstream_buffer_size=0;
2299     s->pp_time=0;
2300 }
2301
2302 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2303                                    DCTELEM *block, int n, int qscale)
2304 {
2305     int i, level, nCoeffs;
2306     const uint16_t *quant_matrix;
2307
2308     nCoeffs= s->block_last_index[n];
2309
2310     if (n < 4)
2311         block[0] = block[0] * s->y_dc_scale;
2312     else
2313         block[0] = block[0] * s->c_dc_scale;
2314     /* XXX: only mpeg1 */
2315     quant_matrix = s->intra_matrix;
2316     for(i=1;i<=nCoeffs;i++) {
2317         int j= s->intra_scantable.permutated[i];
2318         level = block[j];
2319         if (level) {
2320             if (level < 0) {
2321                 level = -level;
2322                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2323                 level = (level - 1) | 1;
2324                 level = -level;
2325             } else {
2326                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2327                 level = (level - 1) | 1;
2328             }
2329             block[j] = level;
2330         }
2331     }
2332 }
2333
2334 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2335                                    DCTELEM *block, int n, int qscale)
2336 {
2337     int i, level, nCoeffs;
2338     const uint16_t *quant_matrix;
2339
2340     nCoeffs= s->block_last_index[n];
2341
2342     quant_matrix = s->inter_matrix;
2343     for(i=0; i<=nCoeffs; i++) {
2344         int j= s->intra_scantable.permutated[i];
2345         level = block[j];
2346         if (level) {
2347             if (level < 0) {
2348                 level = -level;
2349                 level = (((level << 1) + 1) * qscale *
2350                          ((int) (quant_matrix[j]))) >> 4;
2351                 level = (level - 1) | 1;
2352                 level = -level;
2353             } else {
2354                 level = (((level << 1) + 1) * qscale *
2355                          ((int) (quant_matrix[j]))) >> 4;
2356                 level = (level - 1) | 1;
2357             }
2358             block[j] = level;
2359         }
2360     }
2361 }
2362
2363 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2364                                    DCTELEM *block, int n, int qscale)
2365 {
2366     int i, level, nCoeffs;
2367     const uint16_t *quant_matrix;
2368
2369     if(s->alternate_scan) nCoeffs= 63;
2370     else nCoeffs= s->block_last_index[n];
2371
2372     if (n < 4)
2373         block[0] = block[0] * s->y_dc_scale;
2374     else
2375         block[0] = block[0] * s->c_dc_scale;
2376     quant_matrix = s->intra_matrix;
2377     for(i=1;i<=nCoeffs;i++) {
2378         int j= s->intra_scantable.permutated[i];
2379         level = block[j];
2380         if (level) {
2381             if (level < 0) {
2382                 level = -level;
2383                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2384                 level = -level;
2385             } else {
2386                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2387             }
2388             block[j] = level;
2389         }
2390     }
2391 }
2392
2393 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2394                                    DCTELEM *block, int n, int qscale)
2395 {
2396     int i, level, nCoeffs;
2397     const uint16_t *quant_matrix;
2398     int sum=-1;
2399
2400     if(s->alternate_scan) nCoeffs= 63;
2401     else nCoeffs= s->block_last_index[n];
2402
2403     if (n < 4)
2404         block[0] = block[0] * s->y_dc_scale;
2405     else
2406         block[0] = block[0] * s->c_dc_scale;
2407     quant_matrix = s->intra_matrix;
2408     for(i=1;i<=nCoeffs;i++) {
2409         int j= s->intra_scantable.permutated[i];
2410         level = block[j];
2411         if (level) {
2412             if (level < 0) {
2413                 level = -level;
2414                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2415                 level = -level;
2416             } else {
2417                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2418             }
2419             block[j] = level;
2420             sum+=level;
2421         }
2422     }
2423     block[63]^=sum&1;
2424 }
2425
2426 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2427                                    DCTELEM *block, int n, int qscale)
2428 {
2429     int i, level, nCoeffs;
2430     const uint16_t *quant_matrix;
2431     int sum=-1;
2432
2433     if(s->alternate_scan) nCoeffs= 63;
2434     else nCoeffs= s->block_last_index[n];
2435
2436     quant_matrix = s->inter_matrix;
2437     for(i=0; i<=nCoeffs; i++) {
2438         int j= s->intra_scantable.permutated[i];
2439         level = block[j];
2440         if (level) {
2441             if (level < 0) {
2442                 level = -level;
2443                 level = (((level << 1) + 1) * qscale *
2444                          ((int) (quant_matrix[j]))) >> 4;
2445                 level = -level;
2446             } else {
2447                 level = (((level << 1) + 1) * qscale *
2448                          ((int) (quant_matrix[j]))) >> 4;
2449             }
2450             block[j] = level;
2451             sum+=level;
2452         }
2453     }
2454     block[63]^=sum&1;
2455 }
2456
2457 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2458                                   DCTELEM *block, int n, int qscale)
2459 {
2460     int i, level, qmul, qadd;
2461     int nCoeffs;
2462
2463     assert(s->block_last_index[n]>=0);
2464
2465     qmul = qscale << 1;
2466
2467     if (!s->h263_aic) {
2468         if (n < 4)
2469             block[0] = block[0] * s->y_dc_scale;
2470         else
2471             block[0] = block[0] * s->c_dc_scale;
2472         qadd = (qscale - 1) | 1;
2473     }else{
2474         qadd = 0;
2475     }
2476     if(s->ac_pred)
2477         nCoeffs=63;
2478     else
2479         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2480
2481     for(i=1; i<=nCoeffs; i++) {
2482         level = block[i];
2483         if (level) {
2484             if (level < 0) {
2485                 level = level * qmul - qadd;
2486             } else {
2487                 level = level * qmul + qadd;
2488             }
2489             block[i] = level;
2490         }
2491     }
2492 }
2493
2494 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2495                                   DCTELEM *block, int n, int qscale)
2496 {
2497     int i, level, qmul, qadd;
2498     int nCoeffs;
2499
2500     assert(s->block_last_index[n]>=0);
2501
2502     qadd = (qscale - 1) | 1;
2503     qmul = qscale << 1;
2504
2505     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2506
2507     for(i=0; i<=nCoeffs; i++) {
2508         level = block[i];
2509         if (level) {
2510             if (level < 0) {
2511                 level = level * qmul - qadd;
2512             } else {
2513                 level = level * qmul + qadd;
2514             }
2515             block[i] = level;
2516         }
2517     }
2518 }
2519
2520 /**
2521  * set qscale and update qscale dependent variables.
2522  */
2523 void ff_set_qscale(MpegEncContext * s, int qscale)
2524 {
2525     if (qscale < 1)
2526         qscale = 1;
2527     else if (qscale > 31)
2528         qscale = 31;
2529
2530     s->qscale = qscale;
2531     s->chroma_qscale= s->chroma_qscale_table[qscale];
2532
2533     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2534     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2535 }
2536
2537 void ff_MPV_report_decode_progress(MpegEncContext *s)
2538 {
2539     if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && !s->error_occurred)
2540         ff_thread_report_progress(&s->current_picture_ptr->f, s->mb_y, 0);
2541 }