git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/imgutils.h"
  32 #include "avcodec.h"
  33 #include "dsputil.h"
  34 #include "internal.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "mjpegenc.h"
  38 #include "msmpeg4.h"
  39 #include "faandct.h"
  40 #include "xvmc_internal.h"
  41 #include "thread.h"
  42 #include <limits.h>
  43
  44 //#undef NDEBUG
  45 //#include <assert.h>
  46
  47 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
  48                                    DCTELEM *block, int n, int qscale);
  49 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
  50                                    DCTELEM *block, int n, int qscale);
  51 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
  52                                    DCTELEM *block, int n, int qscale);
  53 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
  54                                    DCTELEM *block, int n, int qscale);
  55 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
  56                                    DCTELEM *block, int n, int qscale);
  57 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
  58                                   DCTELEM *block, int n, int qscale);
  59 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
  60                                   DCTELEM *block, int n, int qscale);
  61
  62
  63 /* enable all paranoid tests for rounding, overflows, etc... */
  64 //#define PARANOID
  65
  66 //#define DEBUG
  67
  68
  69 static const uint8_t ff_default_chroma_qscale_table[32] = {
  70 //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
  71      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
  72     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
  73 };
  74
  75 const uint8_t ff_mpeg1_dc_scale_table[128] = {
  76 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  77     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  78     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  79     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  80     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  81     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  82     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  83     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  84     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  85 };
  86
  87 static const uint8_t mpeg2_dc_scale_table1[128] = {
  88 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  89     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  90     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  91     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  92     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  93     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  94     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  95     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  96     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  97 };
  98
  99 static const uint8_t mpeg2_dc_scale_table2[128] = {
 100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 101     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 102     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 103     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 104     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 105     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 106     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 108     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 109 };
 110
 111 static const uint8_t mpeg2_dc_scale_table3[128] = {
 112 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
 113     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 114     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 115     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 116     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 117     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 118     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 119     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 120     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 121 };
 122
 123 const uint8_t *const ff_mpeg2_dc_scale_table[4] = {
 124     ff_mpeg1_dc_scale_table,
 125     mpeg2_dc_scale_table1,
 126     mpeg2_dc_scale_table2,
 127     mpeg2_dc_scale_table3,
 128 };
 129
 130 const enum PixelFormat ff_pixfmt_list_420[] = {
 131     PIX_FMT_YUV420P,
 132     PIX_FMT_NONE
 133 };
 134
 135 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
 136     PIX_FMT_DXVA2_VLD,
 137     PIX_FMT_VAAPI_VLD,
 138     PIX_FMT_VDA_VLD,
 139     PIX_FMT_YUV420P,
 140     PIX_FMT_NONE
 141 };
 142
 143 const uint8_t *avpriv_mpv_find_start_code(const uint8_t *restrict p,
 144                                           const uint8_t *end,
 145                                           uint32_t * restrict state)
 146 {
 147     int i;
 148
 149     assert(p <= end);
 150     if (p >= end)
 151         return end;
 152
 153     for (i = 0; i < 3; i++) {
 154         uint32_t tmp = *state << 8;
 155         *state = tmp + *(p++);
 156         if (tmp == 0x100 || p == end)
 157             return p;
 158     }
 159
 160     while (p < end) {
 161         if      (p[-1] > 1      ) p += 3;
 162         else if (p[-2]          ) p += 2;
 163         else if (p[-3]|(p[-1]-1)) p++;
 164         else {
 165             p++;
 166             break;
 167         }
 168     }
 169
 170     p = FFMIN(p, end) - 4;
 171     *state = AV_RB32(p);
 172
 173     return p + 4;
 174 }
 175
 176 /* init common dct for both encoder and decoder */
 177 av_cold int ff_dct_common_init(MpegEncContext *s)
 178 {
 179     ff_dsputil_init(&s->dsp, s->avctx);
 180
 181     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
 182     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
 183     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
 184     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
 185     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
 186     if (s->flags & CODEC_FLAG_BITEXACT)
 187         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
 188     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 189
 190 #if HAVE_MMX
 191     ff_MPV_common_init_mmx(s);
 192 #elif ARCH_ALPHA
 193     ff_MPV_common_init_axp(s);
 194 #elif HAVE_MMI
 195     ff_MPV_common_init_mmi(s);
 196 #elif ARCH_ARM
 197     ff_MPV_common_init_arm(s);
 198 #elif HAVE_ALTIVEC
 199     ff_MPV_common_init_altivec(s);
 200 #elif ARCH_BFIN
 201     ff_MPV_common_init_bfin(s);
 202 #endif
 203
 204     /* load & permutate scantables
 205      * note: only wmv uses different ones
 206      */
 207     if (s->alternate_scan) {
 208         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
 209         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
 210     } else {
 211         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 212         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 213     }
 214     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 215     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 216
 217     return 0;
 218 }
 219
 220 void ff_copy_picture(Picture *dst, Picture *src)
 221 {
 222     *dst = *src;
 223     dst->f.type = FF_BUFFER_TYPE_COPY;
 224 }
 225
 226 /**
 227  * Release a frame buffer
 228  */
 229 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 230 {
 231     /* Windows Media Image codecs allocate internal buffers with different
 232      * dimensions; ignore user defined callbacks for these
 233      */
 234     if (s->codec_id != CODEC_ID_WMV3IMAGE && s->codec_id != CODEC_ID_VC1IMAGE)
 235         ff_thread_release_buffer(s->avctx, &pic->f);
 236     else
 237         avcodec_default_release_buffer(s->avctx, &pic->f);
 238     av_freep(&pic->f.hwaccel_picture_private);
 239 }
 240
 241 /**
 242  * Allocate a frame buffer
 243  */
 244 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
 245 {
 246     int r;
 247
 248     if (s->avctx->hwaccel) {
 249         assert(!pic->f.hwaccel_picture_private);
 250         if (s->avctx->hwaccel->priv_data_size) {
 251             pic->f.hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
 252             if (!pic->f.hwaccel_picture_private) {
 253                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
 254                 return -1;
 255             }
 256         }
 257     }
 258
 259     if (s->codec_id != CODEC_ID_WMV3IMAGE && s->codec_id != CODEC_ID_VC1IMAGE)
 260         r = ff_thread_get_buffer(s->avctx, &pic->f);
 261     else
 262         r = avcodec_default_get_buffer(s->avctx, &pic->f);
 263
 264     if (r < 0 || !pic->f.type || !pic->f.data[0]) {
 265         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %p)\n",
 266                r, pic->f.type, pic->f.data[0]);
 267         av_freep(&pic->f.hwaccel_picture_private);
 268         return -1;
 269     }
 270
 271     if (s->linesize && (s->linesize   != pic->f.linesize[0] ||
 272                         s->uvlinesize != pic->f.linesize[1])) {
 273         av_log(s->avctx, AV_LOG_ERROR,
 274                "get_buffer() failed (stride changed)\n");
 275         free_frame_buffer(s, pic);
 276         return -1;
 277     }
 278
 279     if (pic->f.linesize[1] != pic->f.linesize[2]) {
 280         av_log(s->avctx, AV_LOG_ERROR,
 281                "get_buffer() failed (uv stride mismatch)\n");
 282         free_frame_buffer(s, pic);
 283         return -1;
 284     }
 285
 286     return 0;
 287 }
 288
 289 /**
 290  * Allocate a Picture.
 291  * The pixels are allocated/set by calling get_buffer() if shared = 0
 292  */
 293 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 294 {
 295     const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
 296
 297     // the + 1 is needed so memset(,,stride*height) does not sig11
 298
 299     const int mb_array_size = s->mb_stride * s->mb_height;
 300     const int b8_array_size = s->b8_stride * s->mb_height * 2;
 301     const int b4_array_size = s->b4_stride * s->mb_height * 4;
 302     int i;
 303     int r = -1;
 304
 305     if (shared) {
 306         assert(pic->f.data[0]);
 307         assert(pic->f.type == 0 || pic->f.type == FF_BUFFER_TYPE_SHARED);
 308         pic->f.type = FF_BUFFER_TYPE_SHARED;
 309     } else {
 310         assert(!pic->f.data[0]);
 311
 312         if (alloc_frame_buffer(s, pic) < 0)
 313             return -1;
 314
 315         s->linesize   = pic->f.linesize[0];
 316         s->uvlinesize = pic->f.linesize[1];
 317     }
 318
 319     if (pic->f.qscale_table == NULL) {
 320         if (s->encoding) {
 321             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var,
 322                               mb_array_size * sizeof(int16_t), fail)
 323             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var,
 324                               mb_array_size * sizeof(int16_t), fail)
 325             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean,
 326                               mb_array_size * sizeof(int8_t ), fail)
 327         }
 328
 329         FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.mbskip_table,
 330                           mb_array_size * sizeof(uint8_t) + 2, fail)// the + 2 is for the slice end check
 331         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table_base,
 332                           (big_mb_num + s->mb_stride) * sizeof(uint8_t),
 333                           fail)
 334         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base,
 335                           (big_mb_num + s->mb_stride) * sizeof(uint32_t),
 336                           fail)
 337         pic->f.mb_type = pic->mb_type_base + 2 * s->mb_stride + 1;
 338         pic->f.qscale_table = pic->qscale_table_base + 2 * s->mb_stride + 1;
 339         if (s->out_format == FMT_H264) {
 340             for (i = 0; i < 2; i++) {
 341                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i],
 342                                   2 * (b4_array_size + 4) * sizeof(int16_t),
 343                                   fail)
 344                 pic->f.motion_val[i] = pic->motion_val_base[i] + 4;
 345                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.ref_index[i],
 346                                   4 * mb_array_size * sizeof(uint8_t), fail)
 347             }
 348             pic->f.motion_subsample_log2 = 2;
 349         } else if (s->out_format == FMT_H263 || s->encoding ||
 350                    (s->avctx->debug & FF_DEBUG_MV) || s->avctx->debug_mv) {
 351             for (i = 0; i < 2; i++) {
 352                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i],
 353                                   2 * (b8_array_size + 4) * sizeof(int16_t),
 354                                   fail)
 355                 pic->f.motion_val[i] = pic->motion_val_base[i] + 4;
 356                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.ref_index[i],
 357                                   4 * mb_array_size * sizeof(uint8_t), fail)
 358             }
 359             pic->f.motion_subsample_log2 = 3;
 360         }
 361         if (s->avctx->debug&FF_DEBUG_DCT_COEFF) {
 362             FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.dct_coeff,
 363                               64 * mb_array_size * sizeof(DCTELEM) * 6, fail)
 364         }
 365         pic->f.qstride = s->mb_stride;
 366         FF_ALLOCZ_OR_GOTO(s->avctx, pic->f.pan_scan,
 367                           1 * sizeof(AVPanScan), fail)
 368     }
 369
 370     pic->owner2 = s;
 371
 372     return 0;
 373 fail: // for  the FF_ALLOCZ_OR_GOTO macro
 374     if (r >= 0)
 375         free_frame_buffer(s, pic);
 376     return -1;
 377 }
 378
 379 /**
 380  * Deallocate a picture.
 381  */
 382 static void free_picture(MpegEncContext *s, Picture *pic)
 383 {
 384     int i;
 385
 386     if (pic->f.data[0] && pic->f.type != FF_BUFFER_TYPE_SHARED) {
 387         free_frame_buffer(s, pic);
 388     }
 389
 390     av_freep(&pic->mb_var);
 391     av_freep(&pic->mc_mb_var);
 392     av_freep(&pic->mb_mean);
 393     av_freep(&pic->f.mbskip_table);
 394     av_freep(&pic->qscale_table_base);
 395     av_freep(&pic->mb_type_base);
 396     av_freep(&pic->f.dct_coeff);
 397     av_freep(&pic->f.pan_scan);
 398     pic->f.mb_type = NULL;
 399     for (i = 0; i < 2; i++) {
 400         av_freep(&pic->motion_val_base[i]);
 401         av_freep(&pic->f.ref_index[i]);
 402     }
 403
 404     if (pic->f.type == FF_BUFFER_TYPE_SHARED) {
 405         for (i = 0; i < 4; i++) {
 406             pic->f.base[i] =
 407             pic->f.data[i] = NULL;
 408         }
 409         pic->f.type = 0;
 410     }
 411 }
 412
 413 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base)
 414 {
 415     int y_size = s->b8_stride * (2 * s->mb_height + 1);
 416     int c_size = s->mb_stride * (s->mb_height + 1);
 417     int yc_size = y_size + 2 * c_size;
 418     int i;
 419
 420     // edge emu needs blocksize + filter length - 1
 421     // (= 17x17 for  halfpel / 21x21 for  h264)
 422     FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer,
 423                       (s->width + 64) * 2 * 21 * 2, fail);    // (width + edge + align)*interlaced*MBsize*tolerance
 424
 425     // FIXME should be linesize instead of s->width * 2
 426     // but that is not known before get_buffer()
 427     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,
 428                       (s->width + 64) * 4 * 16 * 2 * sizeof(uint8_t), fail)
 429     s->me.temp         = s->me.scratchpad;
 430     s->rd_scratchpad   = s->me.scratchpad;
 431     s->b_scratchpad    = s->me.scratchpad;
 432     s->obmc_scratchpad = s->me.scratchpad + 16;
 433     if (s->encoding) {
 434         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map,
 435                           ME_MAP_SIZE * sizeof(uint32_t), fail)
 436         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map,
 437                           ME_MAP_SIZE * sizeof(uint32_t), fail)
 438         if (s->avctx->noise_reduction) {
 439             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum,
 440                               2 * 64 * sizeof(int), fail)
 441         }
 442     }
 443     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64 * 12 * 2 * sizeof(DCTELEM), fail)
 444     s->block = s->blocks[0];
 445
 446     for (i = 0; i < 12; i++) {
 447         s->pblocks[i] = &s->block[i];
 448     }
 449
 450     if (s->out_format == FMT_H263) {
 451         /* ac values */
 452         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base,
 453                           yc_size * sizeof(int16_t) * 16, fail);
 454         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
 455         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
 456         s->ac_val[2] = s->ac_val[1] + c_size;
 457     }
 458
 459     return 0;
 460 fail:
 461     return -1; // free() through ff_MPV_common_end()
 462 }
 463
 464 static void free_duplicate_context(MpegEncContext *s)
 465 {
 466     if (s == NULL)
 467         return;
 468
 469     av_freep(&s->edge_emu_buffer);
 470     av_freep(&s->me.scratchpad);
 471     s->me.temp =
 472     s->rd_scratchpad =
 473     s->b_scratchpad =
 474     s->obmc_scratchpad = NULL;
 475
 476     av_freep(&s->dct_error_sum);
 477     av_freep(&s->me.map);
 478     av_freep(&s->me.score_map);
 479     av_freep(&s->blocks);
 480     av_freep(&s->ac_val_base);
 481     s->block = NULL;
 482 }
 483
 484 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src)
 485 {
 486 #define COPY(a) bak->a = src->a
 487     COPY(edge_emu_buffer);
 488     COPY(me.scratchpad);
 489     COPY(me.temp);
 490     COPY(rd_scratchpad);
 491     COPY(b_scratchpad);
 492     COPY(obmc_scratchpad);
 493     COPY(me.map);
 494     COPY(me.score_map);
 495     COPY(blocks);
 496     COPY(block);
 497     COPY(start_mb_y);
 498     COPY(end_mb_y);
 499     COPY(me.map_generation);
 500     COPY(pb);
 501     COPY(dct_error_sum);
 502     COPY(dct_count[0]);
 503     COPY(dct_count[1]);
 504     COPY(ac_val_base);
 505     COPY(ac_val[0]);
 506     COPY(ac_val[1]);
 507     COPY(ac_val[2]);
 508 #undef COPY
 509 }
 510
 511 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src)
 512 {
 513     MpegEncContext bak;
 514     int i;
 515     // FIXME copy only needed parts
 516     // START_TIMER
 517     backup_duplicate_context(&bak, dst);
 518     memcpy(dst, src, sizeof(MpegEncContext));
 519     backup_duplicate_context(dst, &bak);
 520     for (i = 0; i < 12; i++) {
 521         dst->pblocks[i] = &dst->block[i];
 522     }
 523     // STOP_TIMER("update_duplicate_context")
 524     // about 10k cycles / 0.01 sec for  1000frames on 1ghz with 2 threads
 525 }
 526
 527 int ff_mpeg_update_thread_context(AVCodecContext *dst,
 528                                   const AVCodecContext *src)
 529 {
 530     MpegEncContext *s = dst->priv_data, *s1 = src->priv_data;
 531
 532     if (dst == src || !s1->context_initialized)
 533         return 0;
 534
 535     // FIXME can parameters change on I-frames?
 536     // in that case dst may need a reinit
 537     if (!s->context_initialized) {
 538         memcpy(s, s1, sizeof(MpegEncContext));
 539
 540         s->avctx                 = dst;
 541         s->picture_range_start  += MAX_PICTURE_COUNT;
 542         s->picture_range_end    += MAX_PICTURE_COUNT;
 543         s->bitstream_buffer      = NULL;
 544         s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0;
 545
 546         ff_MPV_common_init(s);
 547     }
 548
 549     s->avctx->coded_height  = s1->avctx->coded_height;
 550     s->avctx->coded_width   = s1->avctx->coded_width;
 551     s->avctx->width         = s1->avctx->width;
 552     s->avctx->height        = s1->avctx->height;
 553
 554     s->coded_picture_number = s1->coded_picture_number;
 555     s->picture_number       = s1->picture_number;
 556     s->input_picture_number = s1->input_picture_number;
 557
 558     memcpy(s->picture, s1->picture, s1->picture_count * sizeof(Picture));
 559     memcpy(&s->last_picture, &s1->last_picture,
 560            (char *) &s1->last_picture_ptr - (char *) &s1->last_picture);
 561
 562     s->last_picture_ptr    = REBASE_PICTURE(s1->last_picture_ptr,    s, s1);
 563     s->current_picture_ptr = REBASE_PICTURE(s1->current_picture_ptr, s, s1);
 564     s->next_picture_ptr    = REBASE_PICTURE(s1->next_picture_ptr,    s, s1);
 565
 566     // Error/bug resilience
 567     s->next_p_frame_damaged = s1->next_p_frame_damaged;
 568     s->workaround_bugs      = s1->workaround_bugs;
 569
 570     // MPEG4 timing info
 571     memcpy(&s->time_increment_bits, &s1->time_increment_bits,
 572            (char *) &s1->shape - (char *) &s1->time_increment_bits);
 573
 574     // B-frame info
 575     s->max_b_frames = s1->max_b_frames;
 576     s->low_delay    = s1->low_delay;
 577     s->dropable     = s1->dropable;
 578
 579     // DivX handling (doesn't work)
 580     s->divx_packed  = s1->divx_packed;
 581
 582     if (s1->bitstream_buffer) {
 583         if (s1->bitstream_buffer_size +
 584             FF_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size)
 585             av_fast_malloc(&s->bitstream_buffer,
 586                            &s->allocated_bitstream_buffer_size,
 587                            s1->allocated_bitstream_buffer_size);
 588             s->bitstream_buffer_size = s1->bitstream_buffer_size;
 589         memcpy(s->bitstream_buffer, s1->bitstream_buffer,
 590                s1->bitstream_buffer_size);
 591         memset(s->bitstream_buffer + s->bitstream_buffer_size, 0,
 592                FF_INPUT_BUFFER_PADDING_SIZE);
 593     }
 594
 595     // MPEG2/interlacing info
 596     memcpy(&s->progressive_sequence, &s1->progressive_sequence,
 597            (char *) &s1->rtp_mode - (char *) &s1->progressive_sequence);
 598
 599     if (!s1->first_field) {
 600         s->last_pict_type = s1->pict_type;
 601         if (s1->current_picture_ptr)
 602             s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->f.quality;
 603
 604         if (s1->pict_type != AV_PICTURE_TYPE_B) {
 605             s->last_non_b_pict_type = s1->pict_type;
 606         }
 607     }
 608
 609     return 0;
 610 }
 611
 612 /**
 613  * Set the given MpegEncContext to common defaults
 614  * (same for encoding and decoding).
 615  * The changed fields will not depend upon the
 616  * prior state of the MpegEncContext.
 617  */
 618 void ff_MPV_common_defaults(MpegEncContext *s)
 619 {
 620     s->y_dc_scale_table      =
 621     s->c_dc_scale_table      = ff_mpeg1_dc_scale_table;
 622     s->chroma_qscale_table   = ff_default_chroma_qscale_table;
 623     s->progressive_frame     = 1;
 624     s->progressive_sequence  = 1;
 625     s->picture_structure     = PICT_FRAME;
 626
 627     s->coded_picture_number  = 0;
 628     s->picture_number        = 0;
 629     s->input_picture_number  = 0;
 630
 631     s->picture_in_gop_number = 0;
 632
 633     s->f_code                = 1;
 634     s->b_code                = 1;
 635
 636     s->picture_range_start   = 0;
 637     s->picture_range_end     = MAX_PICTURE_COUNT;
 638
 639     s->slice_context_count   = 1;
 640 }
 641
 642 /**
 643  * Set the given MpegEncContext to defaults for decoding.
 644  * the changed fields will not depend upon
 645  * the prior state of the MpegEncContext.
 646  */
 647 void ff_MPV_decode_defaults(MpegEncContext *s)
 648 {
 649     ff_MPV_common_defaults(s);
 650 }
 651
 652 /**
 653  * init common structure for both encoder and decoder.
 654  * this assumes that some variables like width/height are already set
 655  */
 656 av_cold int ff_MPV_common_init(MpegEncContext *s)
 657 {
 658     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 659     int nb_slices = (HAVE_THREADS &&
 660                      s->avctx->active_thread_type & FF_THREAD_SLICE) ?
 661                     s->avctx->thread_count : 1;
 662
 663     if (s->encoding && s->avctx->slices)
 664         nb_slices = s->avctx->slices;
 665
 666     if (s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
 667         s->mb_height = (s->height + 31) / 32 * 2;
 668     else if (s->codec_id != CODEC_ID_H264)
 669         s->mb_height = (s->height + 15) / 16;
 670
 671     if (s->avctx->pix_fmt == PIX_FMT_NONE) {
 672         av_log(s->avctx, AV_LOG_ERROR,
 673                "decoding to PIX_FMT_NONE is not supported.\n");
 674         return -1;
 675     }
 676
 677     if (nb_slices > MAX_THREADS || (nb_slices > s->mb_height && s->mb_height)) {
 678         int max_slices;
 679         if (s->mb_height)
 680             max_slices = FFMIN(MAX_THREADS, s->mb_height);
 681         else
 682             max_slices = MAX_THREADS;
 683         av_log(s->avctx, AV_LOG_WARNING, "too many threads/slices (%d),"
 684                " reducing to %d\n", nb_slices, max_slices);
 685         nb_slices = max_slices;
 686     }
 687
 688     if ((s->width || s->height) &&
 689         av_image_check_size(s->width, s->height, 0, s->avctx))
 690         return -1;
 691
 692     ff_dct_common_init(s);
 693
 694     s->flags  = s->avctx->flags;
 695     s->flags2 = s->avctx->flags2;
 696
 697     if (s->width && s->height) {
 698         s->mb_width   = (s->width + 15) / 16;
 699         s->mb_stride  = s->mb_width + 1;
 700         s->b8_stride  = s->mb_width * 2 + 1;
 701         s->b4_stride  = s->mb_width * 4 + 1;
 702         mb_array_size = s->mb_height * s->mb_stride;
 703         mv_table_size = (s->mb_height + 2) * s->mb_stride + 1;
 704
 705         /* set chroma shifts */
 706         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &s->chroma_x_shift,
 707                                       &s->chroma_y_shift);
 708
 709         /* set default edge pos, will be overriden
 710          * in decode_header if needed */
 711         s->h_edge_pos = s->mb_width * 16;
 712         s->v_edge_pos = s->mb_height * 16;
 713
 714         s->mb_num     = s->mb_width * s->mb_height;
 715
 716         s->block_wrap[0] =
 717         s->block_wrap[1] =
 718         s->block_wrap[2] =
 719         s->block_wrap[3] = s->b8_stride;
 720         s->block_wrap[4] =
 721         s->block_wrap[5] = s->mb_stride;
 722
 723         y_size  = s->b8_stride * (2 * s->mb_height + 1);
 724         c_size  = s->mb_stride * (s->mb_height + 1);
 725         yc_size = y_size + 2   * c_size;
 726
 727         /* convert fourcc to upper case */
 728         s->codec_tag          = avpriv_toupper4(s->avctx->codec_tag);
 729
 730         s->stream_codec_tag   = avpriv_toupper4(s->avctx->stream_codec_tag);
 731
 732         s->avctx->coded_frame = &s->current_picture.f;
 733
 734         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num + 1) * sizeof(int),
 735                           fail); // error ressilience code looks cleaner with this
 736         for (y = 0; y < s->mb_height; y++)
 737             for (x = 0; x < s->mb_width; x++)
 738                 s->mb_index2xy[x + y * s->mb_width] = x + y * s->mb_stride;
 739
 740         s->mb_index2xy[s->mb_height * s->mb_width] =
 741                        (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?
 742
 743         if (s->encoding) {
 744             /* Allocate MV tables */
 745             FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base,
 746                               mv_table_size * 2 * sizeof(int16_t), fail);
 747             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base,
 748                               mv_table_size * 2 * sizeof(int16_t), fail);
 749             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base,
 750                               mv_table_size * 2 * sizeof(int16_t), fail);
 751             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base,
 752                               mv_table_size * 2 * sizeof(int16_t), fail);
 753             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base,
 754                               mv_table_size * 2 * sizeof(int16_t), fail);
 755             FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base,
 756                               mv_table_size * 2 * sizeof(int16_t), fail);
 757             s->p_mv_table            = s->p_mv_table_base +
 758                                        s->mb_stride + 1;
 759             s->b_forw_mv_table       = s->b_forw_mv_table_base +
 760                                        s->mb_stride + 1;
 761             s->b_back_mv_table       = s->b_back_mv_table_base +
 762                                        s->mb_stride + 1;
 763             s->b_bidir_forw_mv_table = s->b_bidir_forw_mv_table_base +
 764                                        s->mb_stride + 1;
 765             s->b_bidir_back_mv_table = s->b_bidir_back_mv_table_base +
 766                                        s->mb_stride + 1;
 767             s->b_direct_mv_table     = s->b_direct_mv_table_base +
 768                                        s->mb_stride + 1;
 769
 770             if (s->msmpeg4_version) {
 771                 FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 772                                   2 * 2 * (MAX_LEVEL + 1) *
 773                                   (MAX_RUN + 1) * 2 * sizeof(int), fail);
 774             }
 775             FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 776
 777             /* Allocate MB type table */
 778             FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type, mb_array_size *
 779                               sizeof(uint16_t), fail); // needed for encoding
 780
 781             FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size *
 782                               sizeof(int), fail);
 783
 784             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,
 785                               64 * 32   * sizeof(int), fail);
 786             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,
 787                               64 * 32   * sizeof(int), fail);
 788             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16,
 789                               64 * 32 * 2 * sizeof(uint16_t), fail);
 790             FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16,
 791                               64 * 32 * 2 * sizeof(uint16_t), fail);
 792             FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 793                               MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 794             FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 795                               MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 796
 797             if (s->avctx->noise_reduction) {
 798                 FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 799                                   2 * 64 * sizeof(uint16_t), fail);
 800             }
 801
 802             FF_ALLOC_OR_GOTO(s->avctx, s->cplx_tab,
 803                              mb_array_size * sizeof(float), fail);
 804             FF_ALLOC_OR_GOTO(s->avctx, s->bits_tab,
 805                              mb_array_size * sizeof(float), fail);
 806         }
 807     }
 808
 809     s->picture_count = MAX_PICTURE_COUNT * FFMAX(1, s->avctx->thread_count);
 810     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture,
 811                       s->picture_count * sizeof(Picture), fail);
 812     for (i = 0; i < s->picture_count; i++) {
 813         avcodec_get_frame_defaults(&s->picture[i].f);
 814     }
 815
 816     if (s->width && s->height) {
 817         FF_ALLOC_OR_GOTO(s->avctx, s->er_temp_buffer,
 818                          mb_array_size * sizeof(uint8_t), fail);
 819         FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table,
 820                           mb_array_size * sizeof(uint8_t), fail);
 821
 822         if (s->codec_id == CODEC_ID_MPEG4 ||
 823             (s->flags & CODEC_FLAG_INTERLACED_ME)) {
 824             /* interlaced direct mode decoding tables */
 825             for (i = 0; i < 2; i++) {
 826                 int j, k;
 827                 for (j = 0; j < 2; j++) {
 828                     for (k = 0; k < 2; k++) {
 829                         FF_ALLOCZ_OR_GOTO(s->avctx,
 830                                           s->b_field_mv_table_base[i][j][k],
 831                                           mv_table_size * 2 * sizeof(int16_t),
 832                                           fail);
 833                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] +
 834                                                        s->mb_stride + 1;
 835                     }
 836                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j],
 837                                       mb_array_size * 2 * sizeof(uint8_t),
 838                                       fail);
 839                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j],
 840                                       mv_table_size * 2 * sizeof(int16_t),
 841                                       fail);
 842                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]
 843                                                 + s->mb_stride + 1;
 844                 }
 845                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i],
 846                                   mb_array_size * 2 * sizeof(uint8_t),
 847                                   fail);
 848             }
 849         }
 850         if (s->out_format == FMT_H263) {
 851             /* cbp values */
 852             FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
 853             s->coded_block = s->coded_block_base + s->b8_stride + 1;
 854
 855             /* cbp, ac_pred, pred_dir */
 856             FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table,
 857                               mb_array_size * sizeof(uint8_t), fail);
 858             FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table,
 859                               mb_array_size * sizeof(uint8_t), fail);
 860         }
 861
 862         if (s->h263_pred || s->h263_plus || !s->encoding) {
 863             /* dc values */
 864             // MN: we need these for  error resilience of intra-frames
 865             FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base,
 866                               yc_size * sizeof(int16_t), fail);
 867             s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
 868             s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
 869             s->dc_val[2] = s->dc_val[1] + c_size;
 870             for (i = 0; i < yc_size; i++)
 871                 s->dc_val_base[i] = 1024;
 872         }
 873
 874         /* which mb is a intra block */
 875         FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
 876         memset(s->mbintra_table, 1, mb_array_size);
 877
 878         /* init macroblock skip table */
 879         FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size + 2, fail);
 880         // Note the + 1 is for  a quicker mpeg4 slice_end detection
 881
 882         s->parse_context.state = -1;
 883         if ((s->avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
 884             s->avctx->debug_mv) {
 885             s->visualization_buffer[0] = av_malloc((s->mb_width * 16 +
 886                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 887             s->visualization_buffer[1] = av_malloc((s->mb_width * 16 +
 888                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 889             s->visualization_buffer[2] = av_malloc((s->mb_width * 16 +
 890                         2 * EDGE_WIDTH) * s->mb_height * 16 + 2 * EDGE_WIDTH);
 891         }
 892     }
 893
 894     s->context_initialized = 1;
 895     s->thread_context[0]   = s;
 896
 897     if (s->width && s->height) {
 898         if (nb_slices > 1) {
 899             for (i = 1; i < nb_slices; i++) {
 900                 s->thread_context[i] = av_malloc(sizeof(MpegEncContext));
 901                 memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
 902             }
 903
 904             for (i = 0; i < nb_slices; i++) {
 905                 if (init_duplicate_context(s->thread_context[i], s) < 0)
 906                     goto fail;
 907                     s->thread_context[i]->start_mb_y =
 908                         (s->mb_height * (i) + nb_slices / 2) / nb_slices;
 909                     s->thread_context[i]->end_mb_y   =
 910                         (s->mb_height * (i + 1) + nb_slices / 2) / nb_slices;
 911             }
 912         } else {
 913             if (init_duplicate_context(s, s) < 0)
 914                 goto fail;
 915             s->start_mb_y = 0;
 916             s->end_mb_y   = s->mb_height;
 917         }
 918         s->slice_context_count = nb_slices;
 919     }
 920
 921     return 0;
 922  fail:
 923     ff_MPV_common_end(s);
 924     return -1;
 925 }
 926
 927 /* init common structure for both encoder and decoder */
 928 void ff_MPV_common_end(MpegEncContext *s)
 929 {
 930     int i, j, k;
 931
 932     if (s->slice_context_count > 1) {
 933         for (i = 0; i < s->slice_context_count; i++) {
 934             free_duplicate_context(s->thread_context[i]);
 935         }
 936         for (i = 1; i < s->slice_context_count; i++) {
 937             av_freep(&s->thread_context[i]);
 938         }
 939         s->slice_context_count = 1;
 940     } else free_duplicate_context(s);
 941
 942     av_freep(&s->parse_context.buffer);
 943     s->parse_context.buffer_size = 0;
 944
 945     av_freep(&s->mb_type);
 946     av_freep(&s->p_mv_table_base);
 947     av_freep(&s->b_forw_mv_table_base);
 948     av_freep(&s->b_back_mv_table_base);
 949     av_freep(&s->b_bidir_forw_mv_table_base);
 950     av_freep(&s->b_bidir_back_mv_table_base);
 951     av_freep(&s->b_direct_mv_table_base);
 952     s->p_mv_table            = NULL;
 953     s->b_forw_mv_table       = NULL;
 954     s->b_back_mv_table       = NULL;
 955     s->b_bidir_forw_mv_table = NULL;
 956     s->b_bidir_back_mv_table = NULL;
 957     s->b_direct_mv_table     = NULL;
 958     for (i = 0; i < 2; i++) {
 959         for (j = 0; j < 2; j++) {
 960             for (k = 0; k < 2; k++) {
 961                 av_freep(&s->b_field_mv_table_base[i][j][k]);
 962                 s->b_field_mv_table[i][j][k] = NULL;
 963             }
 964             av_freep(&s->b_field_select_table[i][j]);
 965             av_freep(&s->p_field_mv_table_base[i][j]);
 966             s->p_field_mv_table[i][j] = NULL;
 967         }
 968         av_freep(&s->p_field_select_table[i]);
 969     }
 970
 971     av_freep(&s->dc_val_base);
 972     av_freep(&s->coded_block_base);
 973     av_freep(&s->mbintra_table);
 974     av_freep(&s->cbp_table);
 975     av_freep(&s->pred_dir_table);
 976
 977     av_freep(&s->mbskip_table);
 978     av_freep(&s->bitstream_buffer);
 979     s->allocated_bitstream_buffer_size = 0;
 980
 981     av_freep(&s->avctx->stats_out);
 982     av_freep(&s->ac_stats);
 983     av_freep(&s->error_status_table);
 984     av_freep(&s->er_temp_buffer);
 985     av_freep(&s->mb_index2xy);
 986     av_freep(&s->lambda_table);
 987     av_freep(&s->q_intra_matrix);
 988     av_freep(&s->q_inter_matrix);
 989     av_freep(&s->q_intra_matrix16);
 990     av_freep(&s->q_inter_matrix16);
 991     av_freep(&s->input_picture);
 992     av_freep(&s->reordered_input_picture);
 993     av_freep(&s->dct_offset);
 994     av_freep(&s->cplx_tab);
 995     av_freep(&s->bits_tab);
 996
 997     if (s->picture && !s->avctx->internal->is_copy) {
 998         for (i = 0; i < s->picture_count; i++) {
 999             free_picture(s, &s->picture[i]);
1000         }
1001     }
1002     av_freep(&s->picture);
1003     s->context_initialized      = 0;
1004     s->last_picture_ptr         =
1005     s->next_picture_ptr         =
1006     s->current_picture_ptr      = NULL;
1007     s->linesize = s->uvlinesize = 0;
1008
1009     for (i = 0; i < 3; i++)
1010         av_freep(&s->visualization_buffer[i]);
1011
1012     if (!(s->avctx->active_thread_type & FF_THREAD_FRAME))
1013         avcodec_default_free_buffers(s->avctx);
1014 }
1015
1016 void ff_init_rl(RLTable *rl,
1017                 uint8_t static_store[2][2 * MAX_RUN + MAX_LEVEL + 3])
1018 {
1019     int8_t  max_level[MAX_RUN + 1], max_run[MAX_LEVEL + 1];
1020     uint8_t index_run[MAX_RUN + 1];
1021     int last, run, level, start, end, i;
1022
1023     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1024     if (static_store && rl->max_level[0])
1025         return;
1026
1027     /* compute max_level[], max_run[] and index_run[] */
1028     for (last = 0; last < 2; last++) {
1029         if (last == 0) {
1030             start = 0;
1031             end = rl->last;
1032         } else {
1033             start = rl->last;
1034             end = rl->n;
1035         }
1036
1037         memset(max_level, 0, MAX_RUN + 1);
1038         memset(max_run, 0, MAX_LEVEL + 1);
1039         memset(index_run, rl->n, MAX_RUN + 1);
1040         for (i = start; i < end; i++) {
1041             run   = rl->table_run[i];
1042             level = rl->table_level[i];
1043             if (index_run[run] == rl->n)
1044                 index_run[run] = i;
1045             if (level > max_level[run])
1046                 max_level[run] = level;
1047             if (run > max_run[level])
1048                 max_run[level] = run;
1049         }
1050         if (static_store)
1051             rl->max_level[last] = static_store[last];
1052         else
1053             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1054         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1055         if (static_store)
1056             rl->max_run[last]   = static_store[last] + MAX_RUN + 1;
1057         else
1058             rl->max_run[last]   = av_malloc(MAX_LEVEL + 1);
1059         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1060         if (static_store)
1061             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
1062         else
1063             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1064         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1065     }
1066 }
1067
1068 void ff_init_vlc_rl(RLTable *rl)
1069 {
1070     int i, q;
1071
1072     for (q = 0; q < 32; q++) {
1073         int qmul = q * 2;
1074         int qadd = (q - 1) | 1;
1075
1076         if (q == 0) {
1077             qmul = 1;
1078             qadd = 0;
1079         }
1080         for (i = 0; i < rl->vlc.table_size; i++) {
1081             int code = rl->vlc.table[i][0];
1082             int len  = rl->vlc.table[i][1];
1083             int level, run;
1084
1085             if (len == 0) { // illegal code
1086                 run   = 66;
1087                 level = MAX_LEVEL;
1088             } else if (len < 0) { // more bits needed
1089                 run   = 0;
1090                 level = code;
1091             } else {
1092                 if (code == rl->n) { // esc
1093                     run   = 66;
1094                     level =  0;
1095                 } else {
1096                     run   = rl->table_run[code] + 1;
1097                     level = rl->table_level[code] * qmul + qadd;
1098                     if (code >= rl->last) run += 192;
1099                 }
1100             }
1101             rl->rl_vlc[q][i].len   = len;
1102             rl->rl_vlc[q][i].level = level;
1103             rl->rl_vlc[q][i].run   = run;
1104         }
1105     }
1106 }
1107
1108 void ff_release_unused_pictures(MpegEncContext*s, int remove_current)
1109 {
1110     int i;
1111
1112     /* release non reference frames */
1113     for (i = 0; i < s->picture_count; i++) {
1114         if (s->picture[i].f.data[0] && !s->picture[i].f.reference &&
1115             (!s->picture[i].owner2 || s->picture[i].owner2 == s) &&
1116             (remove_current || &s->picture[i] !=  s->current_picture_ptr)
1117             /* && s->picture[i].type!= FF_BUFFER_TYPE_SHARED */) {
1118             free_frame_buffer(s, &s->picture[i]);
1119         }
1120     }
1121 }
1122
1123 int ff_find_unused_picture(MpegEncContext *s, int shared)
1124 {
1125     int i;
1126
1127     if (shared) {
1128         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1129             if (s->picture[i].f.data[0] == NULL && s->picture[i].f.type == 0)
1130                 return i;
1131         }
1132     } else {
1133         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1134             if (s->picture[i].f.data[0] == NULL && s->picture[i].f.type != 0)
1135                 return i; // FIXME
1136         }
1137         for (i = s->picture_range_start; i < s->picture_range_end; i++) {
1138             if (s->picture[i].f.data[0] == NULL)
1139                 return i;
1140         }
1141     }
1142
1143     return AVERROR_INVALIDDATA;
1144 }
1145
1146 static void update_noise_reduction(MpegEncContext *s)
1147 {
1148     int intra, i;
1149
1150     for (intra = 0; intra < 2; intra++) {
1151         if (s->dct_count[intra] > (1 << 16)) {
1152             for (i = 0; i < 64; i++) {
1153                 s->dct_error_sum[intra][i] >>= 1;
1154             }
1155             s->dct_count[intra] >>= 1;
1156         }
1157
1158         for (i = 0; i < 64; i++) {
1159             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1160                                        s->dct_count[intra] +
1161                                        s->dct_error_sum[intra][i] / 2) /
1162                                       (s->dct_error_sum[intra][i] + 1);
1163         }
1164     }
1165 }
1166
1167 /**
1168  * generic function for encode/decode called after coding/decoding
1169  * the header and before a frame is coded/decoded.
1170  */
1171 int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1172 {
1173     int i;
1174     Picture *pic;
1175     s->mb_skipped = 0;
1176
1177     assert(s->last_picture_ptr == NULL || s->out_format != FMT_H264 ||
1178            s->codec_id == CODEC_ID_SVQ3);
1179
1180     /* mark & release old frames */
1181     if (s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3) {
1182         if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1183             s->last_picture_ptr != s->next_picture_ptr &&
1184             s->last_picture_ptr->f.data[0]) {
1185             if (s->last_picture_ptr->owner2 == s)
1186                 free_frame_buffer(s, s->last_picture_ptr);
1187         }
1188
1189         /* release forgotten pictures */
1190         /* if (mpeg124/h263) */
1191         if (!s->encoding) {
1192             for (i = 0; i < s->picture_count; i++) {
1193                 if (s->picture[i].owner2 == s && s->picture[i].f.data[0] &&
1194                     &s->picture[i] != s->last_picture_ptr &&
1195                     &s->picture[i] != s->next_picture_ptr &&
1196                     s->picture[i].f.reference) {
1197                     if (!(avctx->active_thread_type & FF_THREAD_FRAME))
1198                         av_log(avctx, AV_LOG_ERROR,
1199                                "releasing zombie picture\n");
1200                     free_frame_buffer(s, &s->picture[i]);
1201                 }
1202             }
1203         }
1204     }
1205
1206     if (!s->encoding) {
1207         ff_release_unused_pictures(s, 1);
1208
1209         if (s->current_picture_ptr &&
1210             s->current_picture_ptr->f.data[0] == NULL) {
1211             // we already have a unused image
1212             // (maybe it was set before reading the header)
1213             pic = s->current_picture_ptr;
1214         } else {
1215             i   = ff_find_unused_picture(s, 0);
1216             pic = &s->picture[i];
1217         }
1218
1219         pic->f.reference = 0;
1220         if (!s->dropable) {
1221             if (s->codec_id == CODEC_ID_H264)
1222                 pic->f.reference = s->picture_structure;
1223             else if (s->pict_type != AV_PICTURE_TYPE_B)
1224                 pic->f.reference = 3;
1225         }
1226
1227         pic->f.coded_picture_number = s->coded_picture_number++;
1228
1229         if (ff_alloc_picture(s, pic, 0) < 0)
1230             return -1;
1231
1232         s->current_picture_ptr = pic;
1233         // FIXME use only the vars from current_pic
1234         s->current_picture_ptr->f.top_field_first = s->top_field_first;
1235         if (s->codec_id == CODEC_ID_MPEG1VIDEO ||
1236             s->codec_id == CODEC_ID_MPEG2VIDEO) {
1237             if (s->picture_structure != PICT_FRAME)
1238                 s->current_picture_ptr->f.top_field_first =
1239                     (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
1240         }
1241         s->current_picture_ptr->f.interlaced_frame = !s->progressive_frame &&
1242                                                      !s->progressive_sequence;
1243         s->current_picture_ptr->field_picture      =  s->picture_structure != PICT_FRAME;
1244     }
1245
1246     s->current_picture_ptr->f.pict_type = s->pict_type;
1247     // if (s->flags && CODEC_FLAG_QSCALE)
1248     //     s->current_picture_ptr->quality = s->new_picture_ptr->quality;
1249     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1250
1251     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1252
1253     if (s->pict_type != AV_PICTURE_TYPE_B) {
1254         s->last_picture_ptr = s->next_picture_ptr;
1255         if (!s->dropable)
1256             s->next_picture_ptr = s->current_picture_ptr;
1257     }
1258     /* av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n",
1259            s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1260            s->last_picture_ptr    ? s->last_picture_ptr->f.data[0]    : NULL,
1261            s->next_picture_ptr    ? s->next_picture_ptr->f.data[0]    : NULL,
1262            s->current_picture_ptr ? s->current_picture_ptr->f.data[0] : NULL,
1263            s->pict_type, s->dropable); */
1264
1265     if (s->codec_id != CODEC_ID_H264) {
1266         if ((s->last_picture_ptr == NULL ||
1267              s->last_picture_ptr->f.data[0] == NULL) &&
1268             (s->pict_type != AV_PICTURE_TYPE_I ||
1269              s->picture_structure != PICT_FRAME)) {
1270             if (s->pict_type != AV_PICTURE_TYPE_I)
1271                 av_log(avctx, AV_LOG_ERROR,
1272                        "warning: first frame is no keyframe\n");
1273             else if (s->picture_structure != PICT_FRAME)
1274                 av_log(avctx, AV_LOG_INFO,
1275                        "allocate dummy last picture for field based first keyframe\n");
1276
1277             /* Allocate a dummy frame */
1278             i = ff_find_unused_picture(s, 0);
1279             s->last_picture_ptr = &s->picture[i];
1280             if (ff_alloc_picture(s, s->last_picture_ptr, 0) < 0) {
1281                 s->last_picture_ptr = NULL;
1282                 return -1;
1283             }
1284             ff_thread_report_progress(&s->last_picture_ptr->f, INT_MAX, 0);
1285             ff_thread_report_progress(&s->last_picture_ptr->f, INT_MAX, 1);
1286             s->last_picture_ptr->f.reference = 3;
1287         }
1288         if ((s->next_picture_ptr == NULL ||
1289              s->next_picture_ptr->f.data[0] == NULL) &&
1290             s->pict_type == AV_PICTURE_TYPE_B) {
1291             /* Allocate a dummy frame */
1292             i = ff_find_unused_picture(s, 0);
1293             s->next_picture_ptr = &s->picture[i];
1294             if (ff_alloc_picture(s, s->next_picture_ptr, 0) < 0) {
1295                 s->next_picture_ptr = NULL;
1296                 return -1;
1297             }
1298             ff_thread_report_progress(&s->next_picture_ptr->f, INT_MAX, 0);
1299             ff_thread_report_progress(&s->next_picture_ptr->f, INT_MAX, 1);
1300             s->next_picture_ptr->f.reference = 3;
1301         }
1302     }
1303
1304     if (s->last_picture_ptr)
1305         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
1306     if (s->next_picture_ptr)
1307         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
1308
1309     if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_FRAME) &&
1310         (s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3)) {
1311         if (s->next_picture_ptr)
1312             s->next_picture_ptr->owner2 = s;
1313         if (s->last_picture_ptr)
1314             s->last_picture_ptr->owner2 = s;
1315     }
1316
1317     assert(s->pict_type == AV_PICTURE_TYPE_I || (s->last_picture_ptr &&
1318                                                  s->last_picture_ptr->f.data[0]));
1319
1320     if (s->picture_structure!= PICT_FRAME && s->out_format != FMT_H264) {
1321         int i;
1322         for (i = 0; i < 4; i++) {
1323             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1324                 s->current_picture.f.data[i] +=
1325                     s->current_picture.f.linesize[i];
1326             }
1327             s->current_picture.f.linesize[i] *= 2;
1328             s->last_picture.f.linesize[i]    *= 2;
1329             s->next_picture.f.linesize[i]    *= 2;
1330         }
1331     }
1332
1333     s->err_recognition = avctx->err_recognition;
1334
1335     /* set dequantizer, we can't do it during init as
1336      * it might change for mpeg4 and we can't do it in the header
1337      * decode as init is not called for mpeg4 there yet */
1338     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO) {
1339         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1340         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1341     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1342         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1343         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1344     } else {
1345         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1346         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1347     }
1348
1349     if (s->dct_error_sum) {
1350         assert(s->avctx->noise_reduction && s->encoding);
1351         update_noise_reduction(s);
1352     }
1353
1354     if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1355         return ff_xvmc_field_start(s, avctx);
1356
1357     return 0;
1358 }
1359
1360 /* generic function for encode/decode called after a
1361  * frame has been coded/decoded. */
1362 void ff_MPV_frame_end(MpegEncContext *s)
1363 {
1364     int i;
1365     /* redraw edges for the frame if decoding didn't complete */
1366     // just to make sure that all data is rendered.
1367     if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration) {
1368         ff_xvmc_field_end(s);
1369    } else if ((s->error_count || s->encoding) &&
1370               !s->avctx->hwaccel &&
1371               !(s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) &&
1372               s->unrestricted_mv &&
1373               s->current_picture.f.reference &&
1374               !s->intra_only &&
1375               !(s->flags & CODEC_FLAG_EMU_EDGE)) {
1376         int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
1377         int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
1378         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1379                           s->h_edge_pos, s->v_edge_pos,
1380                           EDGE_WIDTH, EDGE_WIDTH,
1381                           EDGE_TOP | EDGE_BOTTOM);
1382         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1383                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1384                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1385                           EDGE_TOP | EDGE_BOTTOM);
1386         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1387                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1388                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1389                           EDGE_TOP | EDGE_BOTTOM);
1390     }
1391
1392     emms_c();
1393
1394     s->last_pict_type                 = s->pict_type;
1395     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1396     if (s->pict_type!= AV_PICTURE_TYPE_B) {
1397         s->last_non_b_pict_type = s->pict_type;
1398     }
1399 #if 0
1400     /* copy back current_picture variables */
1401     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1402         if (s->picture[i].f.data[0] == s->current_picture.f.data[0]) {
1403             s->picture[i] = s->current_picture;
1404             break;
1405         }
1406     }
1407     assert(i < MAX_PICTURE_COUNT);
1408 #endif
1409
1410     if (s->encoding) {
1411         /* release non-reference frames */
1412         for (i = 0; i < s->picture_count; i++) {
1413             if (s->picture[i].f.data[0] && !s->picture[i].f.reference
1414                 /* && s->picture[i].type != FF_BUFFER_TYPE_SHARED */) {
1415                 free_frame_buffer(s, &s->picture[i]);
1416             }
1417         }
1418     }
1419     // clear copies, to avoid confusion
1420 #if 0
1421     memset(&s->last_picture,    0, sizeof(Picture));
1422     memset(&s->next_picture,    0, sizeof(Picture));
1423     memset(&s->current_picture, 0, sizeof(Picture));
1424 #endif
1425     s->avctx->coded_frame = &s->current_picture_ptr->f;
1426
1427     if (s->codec_id != CODEC_ID_H264 && s->current_picture.f.reference) {
1428         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
1429     }
1430 }
1431
1432 /**
1433  * Draw a line from (ex, ey) -> (sx, sy).
1434  * @param w width of the image
1435  * @param h height of the image
1436  * @param stride stride/linesize of the image
1437  * @param color color of the arrow
1438  */
1439 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
1440                       int w, int h, int stride, int color)
1441 {
1442     int x, y, fr, f;
1443
1444     sx = av_clip(sx, 0, w - 1);
1445     sy = av_clip(sy, 0, h - 1);
1446     ex = av_clip(ex, 0, w - 1);
1447     ey = av_clip(ey, 0, h - 1);
1448
1449     buf[sy * stride + sx] += color;
1450
1451     if (FFABS(ex - sx) > FFABS(ey - sy)) {
1452         if (sx > ex) {
1453             FFSWAP(int, sx, ex);
1454             FFSWAP(int, sy, ey);
1455         }
1456         buf += sx + sy * stride;
1457         ex  -= sx;
1458         f    = ((ey - sy) << 16) / ex;
1459         for (x = 0; x = ex; x++) {
1460             y  = (x * f) >> 16;
1461             fr = (x * f) & 0xFFFF;
1462             buf[y * stride + x]       += (color * (0x10000 - fr)) >> 16;
1463             buf[(y + 1) * stride + x] += (color *            fr ) >> 16;
1464         }
1465     } else {
1466         if (sy > ey) {
1467             FFSWAP(int, sx, ex);
1468             FFSWAP(int, sy, ey);
1469         }
1470         buf += sx + sy * stride;
1471         ey  -= sy;
1472         if (ey)
1473             f  = ((ex - sx) << 16) / ey;
1474         else
1475             f = 0;
1476         for (y = 0; y = ey; y++) {
1477             x  = (y * f) >> 16;
1478             fr = (y * f) & 0xFFFF;
1479             buf[y * stride + x]     += (color * (0x10000 - fr)) >> 16;
1480             buf[y * stride + x + 1] += (color *            fr ) >> 16;
1481         }
1482     }
1483 }
1484
1485 /**
1486  * Draw an arrow from (ex, ey) -> (sx, sy).
1487  * @param w width of the image
1488  * @param h height of the image
1489  * @param stride stride/linesize of the image
1490  * @param color color of the arrow
1491  */
1492 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
1493                        int ey, int w, int h, int stride, int color)
1494 {
1495     int dx,dy;
1496
1497     sx = av_clip(sx, -100, w + 100);
1498     sy = av_clip(sy, -100, h + 100);
1499     ex = av_clip(ex, -100, w + 100);
1500     ey = av_clip(ey, -100, h + 100);
1501
1502     dx = ex - sx;
1503     dy = ey - sy;
1504
1505     if (dx * dx + dy * dy > 3 * 3) {
1506         int rx =  dx + dy;
1507         int ry = -dx + dy;
1508         int length = ff_sqrt((rx * rx + ry * ry) << 8);
1509
1510         // FIXME subpixel accuracy
1511         rx = ROUNDED_DIV(rx * 3 << 4, length);
1512         ry = ROUNDED_DIV(ry * 3 << 4, length);
1513
1514         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1515         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1516     }
1517     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1518 }
1519
1520 /**
1521  * Print debugging info for the given picture.
1522  */
1523 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict)
1524 {
1525     if (s->avctx->hwaccel || !pict || !pict->mb_type)
1526         return;
1527
1528     if (s->avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
1529         int x,y;
1530
1531         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1532         switch (pict->pict_type) {
1533         case AV_PICTURE_TYPE_I:
1534             av_log(s->avctx,AV_LOG_DEBUG,"I\n");
1535             break;
1536         case AV_PICTURE_TYPE_P:
1537             av_log(s->avctx,AV_LOG_DEBUG,"P\n");
1538             break;
1539         case AV_PICTURE_TYPE_B:
1540             av_log(s->avctx,AV_LOG_DEBUG,"B\n");
1541             break;
1542         case AV_PICTURE_TYPE_S:
1543             av_log(s->avctx,AV_LOG_DEBUG,"S\n");
1544             break;
1545         case AV_PICTURE_TYPE_SI:
1546             av_log(s->avctx,AV_LOG_DEBUG,"SI\n");
1547             break;
1548         case AV_PICTURE_TYPE_SP:
1549             av_log(s->avctx,AV_LOG_DEBUG,"SP\n");
1550             break;
1551         }
1552         for (y = 0; y < s->mb_height; y++) {
1553             for (x = 0; x < s->mb_width; x++) {
1554                 if (s->avctx->debug & FF_DEBUG_SKIP) {
1555                     int count = s->mbskip_table[x + y * s->mb_stride];
1556                     if (count > 9)
1557                         count = 9;
1558                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1559                 }
1560                 if (s->avctx->debug & FF_DEBUG_QP) {
1561                     av_log(s->avctx, AV_LOG_DEBUG, "%2d",
1562                            pict->qscale_table[x + y * s->mb_stride]);
1563                 }
1564                 if (s->avctx->debug & FF_DEBUG_MB_TYPE) {
1565                     int mb_type = pict->mb_type[x + y * s->mb_stride];
1566                     // Type & MV direction
1567                     if (IS_PCM(mb_type))
1568                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1569                     else if (IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1570                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1571                     else if (IS_INTRA4x4(mb_type))
1572                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1573                     else if (IS_INTRA16x16(mb_type))
1574                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1575                     else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1576                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1577                     else if (IS_DIRECT(mb_type))
1578                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1579                     else if (IS_GMC(mb_type) && IS_SKIP(mb_type))
1580                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1581                     else if (IS_GMC(mb_type))
1582                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1583                     else if (IS_SKIP(mb_type))
1584                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1585                     else if (!USES_LIST(mb_type, 1))
1586                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1587                     else if (!USES_LIST(mb_type, 0))
1588                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1589                     else {
1590                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1591                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1592                     }
1593
1594                     // segmentation
1595                     if (IS_8X8(mb_type))
1596                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1597                     else if (IS_16X8(mb_type))
1598                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1599                     else if (IS_8X16(mb_type))
1600                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1601                     else if (IS_INTRA(mb_type) || IS_16X16(mb_type))
1602                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1603                     else
1604                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1605
1606
1607                     if (IS_INTERLACED(mb_type))
1608                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1609                     else
1610                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1611                 }
1612                 // av_log(s->avctx, AV_LOG_DEBUG, " ");
1613             }
1614             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1615         }
1616     }
1617
1618     if ((s->avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
1619         (s->avctx->debug_mv)) {
1620         const int shift = 1 + s->quarter_sample;
1621         int mb_y;
1622         uint8_t *ptr;
1623         int i;
1624         int h_chroma_shift, v_chroma_shift, block_height;
1625         const int width          = s->avctx->width;
1626         const int height         = s->avctx->height;
1627         const int mv_sample_log2 = 4 - pict->motion_subsample_log2;
1628         const int mv_stride      = (s->mb_width << mv_sample_log2) +
1629                                    (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1630         s->low_delay = 0; // needed to see the vectors without trashing the buffers
1631
1632         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,
1633                                       &h_chroma_shift, &v_chroma_shift);
1634         for (i = 0; i < 3; i++) {
1635             memcpy(s->visualization_buffer[i], pict->data[i],
1636                    (i == 0) ? pict->linesize[i] * height:
1637                               pict->linesize[i] * height >> v_chroma_shift);
1638             pict->data[i] = s->visualization_buffer[i];
1639         }
1640         pict->type   = FF_BUFFER_TYPE_COPY;
1641         ptr          = pict->data[0];
1642         block_height = 16 >> v_chroma_shift;
1643
1644         for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1645             int mb_x;
1646             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1647                 const int mb_index = mb_x + mb_y * s->mb_stride;
1648                 if ((s->avctx->debug_mv) && pict->motion_val) {
1649                     int type;
1650                     for (type = 0; type < 3; type++) {
1651                         int direction = 0;
1652                         switch (type) {
1653                         case 0:
1654                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_P_FOR)) ||
1655                                 (pict->pict_type!= AV_PICTURE_TYPE_P))
1656                                 continue;
1657                             direction = 0;
1658                             break;
1659                         case 1:
1660                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_B_FOR)) ||
1661                                 (pict->pict_type!= AV_PICTURE_TYPE_B))
1662                                 continue;
1663                             direction = 0;
1664                             break;
1665                         case 2:
1666                             if ((!(s->avctx->debug_mv & FF_DEBUG_VIS_MV_B_BACK)) ||
1667                                 (pict->pict_type!= AV_PICTURE_TYPE_B))
1668                                 continue;
1669                             direction = 1;
1670                             break;
1671                         }
1672                         if (!USES_LIST(pict->mb_type[mb_index], direction))
1673                             continue;
1674
1675                         if (IS_8X8(pict->mb_type[mb_index])) {
1676                             int i;
1677                             for (i = 0; i < 4; i++) {
1678                                 int sx = mb_x * 16 + 4 + 8 * (i & 1);
1679                                 int sy = mb_y * 16 + 4 + 8 * (i >> 1);
1680                                 int xy = (mb_x * 2 + (i & 1) +
1681                                           (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
1682                                 int mx = (pict->motion_val[direction][xy][0] >> shift) + sx;
1683                                 int my = (pict->motion_val[direction][xy][1] >> shift) + sy;
1684                                 draw_arrow(ptr, sx, sy, mx, my, width,
1685                                            height, s->linesize, 100);
1686                             }
1687                         } else if (IS_16X8(pict->mb_type[mb_index])) {
1688                             int i;
1689                             for (i = 0; i < 2; i++) {
1690                                 int sx = mb_x * 16 + 8;
1691                                 int sy = mb_y * 16 + 4 + 8 * i;
1692                                 int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
1693                                 int mx = (pict->motion_val[direction][xy][0] >> shift);
1694                                 int my = (pict->motion_val[direction][xy][1] >> shift);
1695
1696                                 if (IS_INTERLACED(pict->mb_type[mb_index]))
1697                                     my *= 2;
1698
1699                             draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
1700                                        height, s->linesize, 100);
1701                             }
1702                         } else if (IS_8X16(pict->mb_type[mb_index])) {
1703                             int i;
1704                             for (i = 0; i < 2; i++) {
1705                                 int sx = mb_x * 16 + 4 + 8 * i;
1706                                 int sy = mb_y * 16 + 8;
1707                                 int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
1708                                 int mx = pict->motion_val[direction][xy][0] >> shift;
1709                                 int my = pict->motion_val[direction][xy][1] >> shift;
1710
1711                                 if (IS_INTERLACED(pict->mb_type[mb_index]))
1712                                     my *= 2;
1713
1714                                 draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
1715                                            height, s->linesize, 100);
1716                             }
1717                         } else {
1718                               int sx = mb_x * 16 + 8;
1719                               int sy = mb_y * 16 + 8;
1720                               int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
1721                               int mx = pict->motion_val[direction][xy][0] >> shift + sx;
1722                               int my = pict->motion_val[direction][xy][1] >> shift + sy;
1723                               draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1724                         }
1725                     }
1726                 }
1727                 if ((s->avctx->debug & FF_DEBUG_VIS_QP) && pict->motion_val) {
1728                     uint64_t c = (pict->qscale_table[mb_index] * 128 / 31) *
1729                                  0x0101010101010101ULL;
1730                     int y;
1731                     for (y = 0; y < block_height; y++) {
1732                         *(uint64_t *)(pict->data[1] + 8 * mb_x +
1733                                       (block_height * mb_y + y) *
1734                                       pict->linesize[1]) = c;
1735                         *(uint64_t *)(pict->data[2] + 8 * mb_x +
1736                                       (block_height * mb_y + y) *
1737                                       pict->linesize[2]) = c;
1738                     }
1739                 }
1740                 if ((s->avctx->debug & FF_DEBUG_VIS_MB_TYPE) &&
1741                     pict->motion_val) {
1742                     int mb_type = pict->mb_type[mb_index];
1743                     uint64_t u,v;
1744                     int y;
1745 #define COLOR(theta, r) \
1746     u = (int)(128 + r * cos(theta * 3.141592 / 180)); \
1747     v = (int)(128 + r * sin(theta * 3.141592 / 180));
1748
1749
1750                     u = v = 128;
1751                     if (IS_PCM(mb_type)) {
1752                         COLOR(120, 48)
1753                     } else if ((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) ||
1754                                IS_INTRA16x16(mb_type)) {
1755                         COLOR(30, 48)
1756                     } else if (IS_INTRA4x4(mb_type)) {
1757                         COLOR(90, 48)
1758                     } else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type)) {
1759                         // COLOR(120, 48)
1760                     } else if (IS_DIRECT(mb_type)) {
1761                         COLOR(150, 48)
1762                     } else if (IS_GMC(mb_type) && IS_SKIP(mb_type)) {
1763                         COLOR(170, 48)
1764                     } else if (IS_GMC(mb_type)) {
1765                         COLOR(190, 48)
1766                     } else if (IS_SKIP(mb_type)) {
1767                         // COLOR(180, 48)
1768                     } else if (!USES_LIST(mb_type, 1)) {
1769                         COLOR(240, 48)
1770                     } else if (!USES_LIST(mb_type, 0)) {
1771                         COLOR(0, 48)
1772                     } else {
1773                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1774                         COLOR(300,48)
1775                     }
1776
1777                     u *= 0x0101010101010101ULL;
1778                     v *= 0x0101010101010101ULL;
1779                     for (y = 0; y < block_height; y++) {
1780                         *(uint64_t *)(pict->data[1] + 8 * mb_x +
1781                                       (block_height * mb_y + y) * pict->linesize[1]) = u;
1782                         *(uint64_t *)(pict->data[2] + 8 * mb_x +
1783                                       (block_height * mb_y + y) * pict->linesize[2]) = v;
1784                     }
1785
1786                     // segmentation
1787                     if (IS_8X8(mb_type) || IS_16X8(mb_type)) {
1788                         *(uint64_t *)(pict->data[0] + 16 * mb_x + 0 +
1789                                       (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
1790                         *(uint64_t *)(pict->data[0] + 16 * mb_x + 8 +
1791                                       (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
1792                     }
1793                     if (IS_8X8(mb_type) || IS_8X16(mb_type)) {
1794                         for (y = 0; y < 16; y++)
1795                             pict->data[0][16 * mb_x + 8 + (16 * mb_y + y) *
1796                                           pict->linesize[0]] ^= 0x80;
1797                     }
1798                     if (IS_8X8(mb_type) && mv_sample_log2 >= 2) {
1799                         int dm = 1 << (mv_sample_log2 - 2);
1800                         for (i = 0; i < 4; i++) {
1801                             int sx = mb_x * 16 + 8 * (i & 1);
1802                             int sy = mb_y * 16 + 8 * (i >> 1);
1803                             int xy = (mb_x * 2 + (i & 1) +
1804                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
1805                             // FIXME bidir
1806                             int32_t *mv = (int32_t *) &pict->motion_val[0][xy];
1807                             if (mv[0] != mv[dm] ||
1808                                 mv[dm * mv_stride] != mv[dm * (mv_stride + 1)])
1809                                 for (y = 0; y < 8; y++)
1810                                     pict->data[0][sx + 4 + (sy + y) * pict->linesize[0]] ^= 0x80;
1811                             if (mv[0] != mv[dm * mv_stride] || mv[dm] != mv[dm * (mv_stride + 1)])
1812                                 *(uint64_t *)(pict->data[0] + sx + (sy + 4) *
1813                                               pict->linesize[0]) ^= 0x8080808080808080ULL;
1814                         }
1815                     }
1816
1817                     if (IS_INTERLACED(mb_type) &&
1818                         s->codec_id == CODEC_ID_H264) {
1819                         // hmm
1820                     }
1821                 }
1822                 s->mbskip_table[mb_index] = 0;
1823             }
1824         }
1825     }
1826 }
1827
1828 /**
1829  * find the lowest MB row referenced in the MVs
1830  */
1831 int ff_MPV_lowest_referenced_row(MpegEncContext *s, int dir)
1832 {
1833     int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample;
1834     int my, off, i, mvs;
1835
1836     if (s->picture_structure != PICT_FRAME) goto unhandled;
1837
1838     switch (s->mv_type) {
1839         case MV_TYPE_16X16:
1840             mvs = 1;
1841             break;
1842         case MV_TYPE_16X8:
1843             mvs = 2;
1844             break;
1845         case MV_TYPE_8X8:
1846             mvs = 4;
1847             break;
1848         default:
1849             goto unhandled;
1850     }
1851
1852     for (i = 0; i < mvs; i++) {
1853         my = s->mv[dir][i][1]<<qpel_shift;
1854         my_max = FFMAX(my_max, my);
1855         my_min = FFMIN(my_min, my);
1856     }
1857
1858     off = (FFMAX(-my_min, my_max) + 63) >> 6;
1859
1860     return FFMIN(FFMAX(s->mb_y + off, 0), s->mb_height-1);
1861 unhandled:
1862     return s->mb_height-1;
1863 }
1864
1865 /* put block[] to dest[] */
1866 static inline void put_dct(MpegEncContext *s,
1867                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1868 {
1869     s->dct_unquantize_intra(s, block, i, qscale);
1870     s->dsp.idct_put (dest, line_size, block);
1871 }
1872
1873 /* add block[] to dest[] */
1874 static inline void add_dct(MpegEncContext *s,
1875                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1876 {
1877     if (s->block_last_index[i] >= 0) {
1878         s->dsp.idct_add (dest, line_size, block);
1879     }
1880 }
1881
1882 static inline void add_dequant_dct(MpegEncContext *s,
1883                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1884 {
1885     if (s->block_last_index[i] >= 0) {
1886         s->dct_unquantize_inter(s, block, i, qscale);
1887
1888         s->dsp.idct_add (dest, line_size, block);
1889     }
1890 }
1891
1892 /**
1893  * Clean dc, ac, coded_block for the current non-intra MB.
1894  */
1895 void ff_clean_intra_table_entries(MpegEncContext *s)
1896 {
1897     int wrap = s->b8_stride;
1898     int xy = s->block_index[0];
1899
1900     s->dc_val[0][xy           ] =
1901     s->dc_val[0][xy + 1       ] =
1902     s->dc_val[0][xy     + wrap] =
1903     s->dc_val[0][xy + 1 + wrap] = 1024;
1904     /* ac pred */
1905     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1906     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1907     if (s->msmpeg4_version>=3) {
1908         s->coded_block[xy           ] =
1909         s->coded_block[xy + 1       ] =
1910         s->coded_block[xy     + wrap] =
1911         s->coded_block[xy + 1 + wrap] = 0;
1912     }
1913     /* chroma */
1914     wrap = s->mb_stride;
1915     xy = s->mb_x + s->mb_y * wrap;
1916     s->dc_val[1][xy] =
1917     s->dc_val[2][xy] = 1024;
1918     /* ac pred */
1919     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1920     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1921
1922     s->mbintra_table[xy]= 0;
1923 }
1924
1925 /* generic function called after a macroblock has been parsed by the
1926    decoder or after it has been encoded by the encoder.
1927
1928    Important variables used:
1929    s->mb_intra : true if intra macroblock
1930    s->mv_dir   : motion vector direction
1931    s->mv_type  : motion vector type
1932    s->mv       : motion vector
1933    s->interlaced_dct : true if interlaced dct used (mpeg2)
1934  */
1935 static av_always_inline
1936 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1937                             int is_mpeg12)
1938 {
1939     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1940     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1941         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1942         return;
1943     }
1944
1945     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1946        /* save DCT coefficients */
1947        int i,j;
1948        DCTELEM *dct = &s->current_picture.f.dct_coeff[mb_xy * 64 * 6];
1949        av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y);
1950        for(i=0; i<6; i++){
1951            for(j=0; j<64; j++){
1952                *dct++ = block[i][s->dsp.idct_permutation[j]];
1953                av_log(s->avctx, AV_LOG_DEBUG, "%5d", dct[-1]);
1954            }
1955            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1956        }
1957     }
1958
1959     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1960
1961     /* update DC predictors for P macroblocks */
1962     if (!s->mb_intra) {
1963         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1964             if(s->mbintra_table[mb_xy])
1965                 ff_clean_intra_table_entries(s);
1966         } else {
1967             s->last_dc[0] =
1968             s->last_dc[1] =
1969             s->last_dc[2] = 128 << s->intra_dc_precision;
1970         }
1971     }
1972     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1973         s->mbintra_table[mb_xy]=1;
1974
1975     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==AV_PICTURE_TYPE_B) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1976         uint8_t *dest_y, *dest_cb, *dest_cr;
1977         int dct_linesize, dct_offset;
1978         op_pixels_func (*op_pix)[4];
1979         qpel_mc_func (*op_qpix)[16];
1980         const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
1981         const int uvlinesize = s->current_picture.f.linesize[1];
1982         const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band;
1983         const int block_size = 8;
1984
1985         /* avoid copy if macroblock skipped in last frame too */
1986         /* skip only during decoding as we might trash the buffers during encoding a bit */
1987         if(!s->encoding){
1988             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1989
1990             if (s->mb_skipped) {
1991                 s->mb_skipped= 0;
1992                 assert(s->pict_type!=AV_PICTURE_TYPE_I);
1993                 *mbskip_ptr = 1;
1994             } else if(!s->current_picture.f.reference) {
1995                 *mbskip_ptr = 1;
1996             } else{
1997                 *mbskip_ptr = 0; /* not skipped */
1998             }
1999         }
2000
2001         dct_linesize = linesize << s->interlaced_dct;
2002         dct_offset   = s->interlaced_dct ? linesize : linesize * block_size;
2003
2004         if(readable){
2005             dest_y=  s->dest[0];
2006             dest_cb= s->dest[1];
2007             dest_cr= s->dest[2];
2008         }else{
2009             dest_y = s->b_scratchpad;
2010             dest_cb= s->b_scratchpad+16*linesize;
2011             dest_cr= s->b_scratchpad+32*linesize;
2012         }
2013
2014         if (!s->mb_intra) {
2015             /* motion handling */
2016             /* decoding or more than one mb_type (MC was already done otherwise) */
2017             if(!s->encoding){
2018
2019                 if(HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
2020                     if (s->mv_dir & MV_DIR_FORWARD) {
2021                         ff_thread_await_progress(&s->last_picture_ptr->f,
2022                                                  ff_MPV_lowest_referenced_row(s, 0),
2023                                                  0);
2024                     }
2025                     if (s->mv_dir & MV_DIR_BACKWARD) {
2026                         ff_thread_await_progress(&s->next_picture_ptr->f,
2027                                                  ff_MPV_lowest_referenced_row(s, 1),
2028                                                  0);
2029                     }
2030                 }
2031
2032                 op_qpix= s->me.qpel_put;
2033                 if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
2034                     op_pix = s->dsp.put_pixels_tab;
2035                 }else{
2036                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2037                 }
2038                 if (s->mv_dir & MV_DIR_FORWARD) {
2039                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data, op_pix, op_qpix);
2040                     op_pix = s->dsp.avg_pixels_tab;
2041                     op_qpix= s->me.qpel_avg;
2042                 }
2043                 if (s->mv_dir & MV_DIR_BACKWARD) {
2044                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data, op_pix, op_qpix);
2045                 }
2046             }
2047
2048             /* skip dequant / idct if we are really late ;) */
2049             if(s->avctx->skip_idct){
2050                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B)
2051                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I)
2052                    || s->avctx->skip_idct >= AVDISCARD_ALL)
2053                     goto skip_idct;
2054             }
2055
2056             /* add dct residue */
2057             if(s->encoding || !(   s->msmpeg4_version || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2058                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2059                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2060                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2061                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2062                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2063
2064                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2065                     if (s->chroma_y_shift){
2066                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2067                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2068                     }else{
2069                         dct_linesize >>= 1;
2070                         dct_offset >>=1;
2071                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2072                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2073                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2074                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2075                     }
2076                 }
2077             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
2078                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
2079                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
2080                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
2081                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
2082
2083                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2084                     if(s->chroma_y_shift){//Chroma420
2085                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2086                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2087                     }else{
2088                         //chroma422
2089                         dct_linesize = uvlinesize << s->interlaced_dct;
2090                         dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
2091
2092                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2093                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2094                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2095                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2096                         if(!s->chroma_x_shift){//Chroma444
2097                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2098                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2099                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2100                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2101                         }
2102                     }
2103                 }//fi gray
2104             }
2105             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
2106                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2107             }
2108         } else {
2109             /* dct only in intra block */
2110             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2111                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2112                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2113                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2114                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2115
2116                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2117                     if(s->chroma_y_shift){
2118                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2119                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2120                     }else{
2121                         dct_offset >>=1;
2122                         dct_linesize >>=1;
2123                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2124                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2125                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2126                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2127                     }
2128                 }
2129             }else{
2130                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2131                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2132                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2133                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2134
2135                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2136                     if(s->chroma_y_shift){
2137                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2138                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2139                     }else{
2140
2141                         dct_linesize = uvlinesize << s->interlaced_dct;
2142                         dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
2143
2144                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2145                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2146                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2147                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2148                         if(!s->chroma_x_shift){//Chroma444
2149                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2150                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2151                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2152                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2153                         }
2154                     }
2155                 }//gray
2156             }
2157         }
2158 skip_idct:
2159         if(!readable){
2160             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2161             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2162             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2163         }
2164     }
2165 }
2166
2167 void ff_MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2168 #if !CONFIG_SMALL
2169     if(s->out_format == FMT_MPEG1) {
2170         MPV_decode_mb_internal(s, block, 1);
2171     } else
2172 #endif
2173         MPV_decode_mb_internal(s, block, 0);
2174 }
2175
2176 /**
2177  * @param h is the normal height, this will be reduced automatically if needed for the last row
2178  */
2179 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2180     const int field_pic= s->picture_structure != PICT_FRAME;
2181     if(field_pic){
2182         h <<= 1;
2183         y <<= 1;
2184     }
2185
2186     if (!s->avctx->hwaccel
2187        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2188        && s->unrestricted_mv
2189        && s->current_picture.f.reference
2190        && !s->intra_only
2191        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
2192         int sides = 0, edge_h;
2193         int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w;
2194         int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h;
2195         if (y==0) sides |= EDGE_TOP;
2196         if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM;
2197
2198         edge_h= FFMIN(h, s->v_edge_pos - y);
2199
2200         s->dsp.draw_edges(s->current_picture_ptr->f.data[0] +  y         *s->linesize,
2201                           s->linesize,           s->h_edge_pos,         edge_h,
2202                           EDGE_WIDTH,            EDGE_WIDTH,            sides);
2203         s->dsp.draw_edges(s->current_picture_ptr->f.data[1] + (y>>vshift)*s->uvlinesize,
2204                           s->uvlinesize,         s->h_edge_pos>>hshift, edge_h>>vshift,
2205                           EDGE_WIDTH>>hshift,    EDGE_WIDTH>>vshift,    sides);
2206         s->dsp.draw_edges(s->current_picture_ptr->f.data[2] + (y>>vshift)*s->uvlinesize,
2207                           s->uvlinesize,         s->h_edge_pos>>hshift, edge_h>>vshift,
2208                           EDGE_WIDTH>>hshift,    EDGE_WIDTH>>vshift,    sides);
2209     }
2210
2211     h= FFMIN(h, s->avctx->height - y);
2212
2213     if(field_pic && s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2214
2215     if (s->avctx->draw_horiz_band) {
2216         AVFrame *src;
2217         int offset[AV_NUM_DATA_POINTERS];
2218         int i;
2219
2220         if(s->pict_type==AV_PICTURE_TYPE_B || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2221             src = &s->current_picture_ptr->f;
2222         else if(s->last_picture_ptr)
2223             src = &s->last_picture_ptr->f;
2224         else
2225             return;
2226
2227         if(s->pict_type==AV_PICTURE_TYPE_B && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2228             for (i = 0; i < AV_NUM_DATA_POINTERS; i++)
2229                 offset[i] = 0;
2230         }else{
2231             offset[0]= y * s->linesize;
2232             offset[1]=
2233             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2234             for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
2235                 offset[i] = 0;
2236         }
2237
2238         emms_c();
2239
2240         s->avctx->draw_horiz_band(s->avctx, src, offset,
2241                                   y, s->picture_structure, h);
2242     }
2243 }
2244
2245 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2246     const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
2247     const int uvlinesize = s->current_picture.f.linesize[1];
2248     const int mb_size= 4;
2249
2250     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2251     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2252     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2253     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2254     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2255     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2256     //block_index is not used by mpeg2, so it is not affected by chroma_format
2257
2258     s->dest[0] = s->current_picture.f.data[0] + ((s->mb_x - 1) <<  mb_size);
2259     s->dest[1] = s->current_picture.f.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2260     s->dest[2] = s->current_picture.f.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2261
2262     if(!(s->pict_type==AV_PICTURE_TYPE_B && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2263     {
2264         if(s->picture_structure==PICT_FRAME){
2265         s->dest[0] += s->mb_y *   linesize << mb_size;
2266         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2267         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2268         }else{
2269             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2270             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2271             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2272             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2273         }
2274     }
2275 }
2276
2277 void ff_mpeg_flush(AVCodecContext *avctx){
2278     int i;
2279     MpegEncContext *s = avctx->priv_data;
2280
2281     if(s==NULL || s->picture==NULL)
2282         return;
2283
2284     for(i=0; i<s->picture_count; i++){
2285        if (s->picture[i].f.data[0] &&
2286            (s->picture[i].f.type == FF_BUFFER_TYPE_INTERNAL ||
2287             s->picture[i].f.type == FF_BUFFER_TYPE_USER))
2288         free_frame_buffer(s, &s->picture[i]);
2289     }
2290     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2291
2292     s->mb_x= s->mb_y= 0;
2293
2294     s->parse_context.state= -1;
2295     s->parse_context.frame_start_found= 0;
2296     s->parse_context.overread= 0;
2297     s->parse_context.overread_index= 0;
2298     s->parse_context.index= 0;
2299     s->parse_context.last_index= 0;
2300     s->bitstream_buffer_size=0;
2301     s->pp_time=0;
2302 }
2303
2304 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2305                                    DCTELEM *block, int n, int qscale)
2306 {
2307     int i, level, nCoeffs;
2308     const uint16_t *quant_matrix;
2309
2310     nCoeffs= s->block_last_index[n];
2311
2312     if (n < 4)
2313         block[0] = block[0] * s->y_dc_scale;
2314     else
2315         block[0] = block[0] * s->c_dc_scale;
2316     /* XXX: only mpeg1 */
2317     quant_matrix = s->intra_matrix;
2318     for(i=1;i<=nCoeffs;i++) {
2319         int j= s->intra_scantable.permutated[i];
2320         level = block[j];
2321         if (level) {
2322             if (level < 0) {
2323                 level = -level;
2324                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2325                 level = (level - 1) | 1;
2326                 level = -level;
2327             } else {
2328                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2329                 level = (level - 1) | 1;
2330             }
2331             block[j] = level;
2332         }
2333     }
2334 }
2335
2336 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2337                                    DCTELEM *block, int n, int qscale)
2338 {
2339     int i, level, nCoeffs;
2340     const uint16_t *quant_matrix;
2341
2342     nCoeffs= s->block_last_index[n];
2343
2344     quant_matrix = s->inter_matrix;
2345     for(i=0; i<=nCoeffs; i++) {
2346         int j= s->intra_scantable.permutated[i];
2347         level = block[j];
2348         if (level) {
2349             if (level < 0) {
2350                 level = -level;
2351                 level = (((level << 1) + 1) * qscale *
2352                          ((int) (quant_matrix[j]))) >> 4;
2353                 level = (level - 1) | 1;
2354                 level = -level;
2355             } else {
2356                 level = (((level << 1) + 1) * qscale *
2357                          ((int) (quant_matrix[j]))) >> 4;
2358                 level = (level - 1) | 1;
2359             }
2360             block[j] = level;
2361         }
2362     }
2363 }
2364
2365 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2366                                    DCTELEM *block, int n, int qscale)
2367 {
2368     int i, level, nCoeffs;
2369     const uint16_t *quant_matrix;
2370
2371     if(s->alternate_scan) nCoeffs= 63;
2372     else nCoeffs= s->block_last_index[n];
2373
2374     if (n < 4)
2375         block[0] = block[0] * s->y_dc_scale;
2376     else
2377         block[0] = block[0] * s->c_dc_scale;
2378     quant_matrix = s->intra_matrix;
2379     for(i=1;i<=nCoeffs;i++) {
2380         int j= s->intra_scantable.permutated[i];
2381         level = block[j];
2382         if (level) {
2383             if (level < 0) {
2384                 level = -level;
2385                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2386                 level = -level;
2387             } else {
2388                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2389             }
2390             block[j] = level;
2391         }
2392     }
2393 }
2394
2395 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2396                                    DCTELEM *block, int n, int qscale)
2397 {
2398     int i, level, nCoeffs;
2399     const uint16_t *quant_matrix;
2400     int sum=-1;
2401
2402     if(s->alternate_scan) nCoeffs= 63;
2403     else nCoeffs= s->block_last_index[n];
2404
2405     if (n < 4)
2406         block[0] = block[0] * s->y_dc_scale;
2407     else
2408         block[0] = block[0] * s->c_dc_scale;
2409     quant_matrix = s->intra_matrix;
2410     for(i=1;i<=nCoeffs;i++) {
2411         int j= s->intra_scantable.permutated[i];
2412         level = block[j];
2413         if (level) {
2414             if (level < 0) {
2415                 level = -level;
2416                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2417                 level = -level;
2418             } else {
2419                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2420             }
2421             block[j] = level;
2422             sum+=level;
2423         }
2424     }
2425     block[63]^=sum&1;
2426 }
2427
2428 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2429                                    DCTELEM *block, int n, int qscale)
2430 {
2431     int i, level, nCoeffs;
2432     const uint16_t *quant_matrix;
2433     int sum=-1;
2434
2435     if(s->alternate_scan) nCoeffs= 63;
2436     else nCoeffs= s->block_last_index[n];
2437
2438     quant_matrix = s->inter_matrix;
2439     for(i=0; i<=nCoeffs; i++) {
2440         int j= s->intra_scantable.permutated[i];
2441         level = block[j];
2442         if (level) {
2443             if (level < 0) {
2444                 level = -level;
2445                 level = (((level << 1) + 1) * qscale *
2446                          ((int) (quant_matrix[j]))) >> 4;
2447                 level = -level;
2448             } else {
2449                 level = (((level << 1) + 1) * qscale *
2450                          ((int) (quant_matrix[j]))) >> 4;
2451             }
2452             block[j] = level;
2453             sum+=level;
2454         }
2455     }
2456     block[63]^=sum&1;
2457 }
2458
2459 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2460                                   DCTELEM *block, int n, int qscale)
2461 {
2462     int i, level, qmul, qadd;
2463     int nCoeffs;
2464
2465     assert(s->block_last_index[n]>=0);
2466
2467     qmul = qscale << 1;
2468
2469     if (!s->h263_aic) {
2470         if (n < 4)
2471             block[0] = block[0] * s->y_dc_scale;
2472         else
2473             block[0] = block[0] * s->c_dc_scale;
2474         qadd = (qscale - 1) | 1;
2475     }else{
2476         qadd = 0;
2477     }
2478     if(s->ac_pred)
2479         nCoeffs=63;
2480     else
2481         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2482
2483     for(i=1; i<=nCoeffs; i++) {
2484         level = block[i];
2485         if (level) {
2486             if (level < 0) {
2487                 level = level * qmul - qadd;
2488             } else {
2489                 level = level * qmul + qadd;
2490             }
2491             block[i] = level;
2492         }
2493     }
2494 }
2495
2496 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2497                                   DCTELEM *block, int n, int qscale)
2498 {
2499     int i, level, qmul, qadd;
2500     int nCoeffs;
2501
2502     assert(s->block_last_index[n]>=0);
2503
2504     qadd = (qscale - 1) | 1;
2505     qmul = qscale << 1;
2506
2507     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2508
2509     for(i=0; i<=nCoeffs; i++) {
2510         level = block[i];
2511         if (level) {
2512             if (level < 0) {
2513                 level = level * qmul - qadd;
2514             } else {
2515                 level = level * qmul + qadd;
2516             }
2517             block[i] = level;
2518         }
2519     }
2520 }
2521
2522 /**
2523  * set qscale and update qscale dependent variables.
2524  */
2525 void ff_set_qscale(MpegEncContext * s, int qscale)
2526 {
2527     if (qscale < 1)
2528         qscale = 1;
2529     else if (qscale > 31)
2530         qscale = 31;
2531
2532     s->qscale = qscale;
2533     s->chroma_qscale= s->chroma_qscale_table[qscale];
2534
2535     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2536     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2537 }
2538
2539 void ff_MPV_report_decode_progress(MpegEncContext *s)
2540 {
2541     if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && !s->error_occurred)
2542         ff_thread_report_progress(&s->current_picture_ptr->f, s->mb_y, 0);
2543 }