git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64 #include "sp5x.h"
  65
  66 #define QUANT_BIAS_SHIFT 8
  67
  68 #define QMAT_SHIFT_MMX 16
  69 #define QMAT_SHIFT 21
  70
  71 static int encode_picture(MpegEncContext *s, int picture_number);
  72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  73 static int sse_mb(MpegEncContext *s);
  74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  76
  77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  79
  80 const AVOption ff_mpv_generic_options[] = {
  81     FF_MPV_COMMON_OPTS
  82     { NULL },
  83 };
  84
  85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  86                        uint16_t (*qmat16)[2][64],
  87                        const uint16_t *quant_matrix,
  88                        int bias, int qmin, int qmax, int intra)
  89 {
  90     FDCTDSPContext *fdsp = &s->fdsp;
  91     int qscale;
  92     int shift = 0;
  93
  94     for (qscale = qmin; qscale <= qmax; qscale++) {
  95         int i;
  96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  97 #if CONFIG_FAANDCT
  98             fdsp->fdct == ff_faandct            ||
  99 #endif /* CONFIG_FAANDCT */
 100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 101             for (i = 0; i < 64; i++) {
 102                 const int j = s->idsp.idct_permutation[i];
 103                 int64_t den = (int64_t) qscale * quant_matrix[j];
 104                 /* 16 <= qscale * quant_matrix[i] <= 7905
 105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 106                  *             19952 <=              x  <= 249205026
 107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 108                  *           3444240 >= (1 << 36) / (x) >= 275 */
 109
 110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 111             }
 112         } else if (fdsp->fdct == ff_fdct_ifast) {
 113             for (i = 0; i < 64; i++) {
 114                 const int j = s->idsp.idct_permutation[i];
 115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 116                 /* 16 <= qscale * quant_matrix[i] <= 7905
 117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 118                  *             19952 <=              x  <= 249205026
 119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 120                  *           3444240 >= (1 << 36) / (x) >= 275 */
 121
 122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 123             }
 124         } else {
 125             for (i = 0; i < 64; i++) {
 126                 const int j = s->idsp.idct_permutation[i];
 127                 int64_t den = (int64_t) qscale * quant_matrix[j];
 128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 129                  * Assume x = qscale * quant_matrix[i]
 130                  * So             16 <=              x  <= 7905
 131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 132                  * so          32768 >= (1 << 19) / (x) >= 67 */
 133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 135                 //                    (qscale * quant_matrix[i]);
 136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 137
 138                 if (qmat16[qscale][0][i] == 0 ||
 139                     qmat16[qscale][0][i] == 128 * 256)
 140                     qmat16[qscale][0][i] = 128 * 256 - 1;
 141                 qmat16[qscale][1][i] =
 142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 143                                 qmat16[qscale][0][i]);
 144             }
 145         }
 146
 147         for (i = intra; i < 64; i++) {
 148             int64_t max = 8191;
 149             if (fdsp->fdct == ff_fdct_ifast) {
 150                 max = (8191LL * ff_aanscales[i]) >> 14;
 151             }
 152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 153                 shift++;
 154             }
 155         }
 156     }
 157     if (shift) {
 158         av_log(NULL, AV_LOG_INFO,
 159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 160                QMAT_SHIFT - shift);
 161     }
 162 }
 163
 164 static inline void update_qscale(MpegEncContext *s)
 165 {
 166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 167                 (FF_LAMBDA_SHIFT + 7);
 168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 169
 170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 171                  FF_LAMBDA_SHIFT;
 172 }
 173
 174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 175 {
 176     int i;
 177
 178     if (matrix) {
 179         put_bits(pb, 1, 1);
 180         for (i = 0; i < 64; i++) {
 181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 182         }
 183     } else
 184         put_bits(pb, 1, 0);
 185 }
 186
 187 /**
 188  * init s->current_picture.qscale_table from s->lambda_table
 189  */
 190 void ff_init_qscale_tab(MpegEncContext *s)
 191 {
 192     int8_t * const qscale_table = s->current_picture.qscale_table;
 193     int i;
 194
 195     for (i = 0; i < s->mb_num; i++) {
 196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 199                                                   s->avctx->qmax);
 200     }
 201 }
 202
 203 static void update_duplicate_context_after_me(MpegEncContext *dst,
 204                                               MpegEncContext *src)
 205 {
 206 #define COPY(a) dst->a= src->a
 207     COPY(pict_type);
 208     COPY(current_picture);
 209     COPY(f_code);
 210     COPY(b_code);
 211     COPY(qscale);
 212     COPY(lambda);
 213     COPY(lambda2);
 214     COPY(picture_in_gop_number);
 215     COPY(gop_picture_number);
 216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 217     COPY(progressive_frame);    // FIXME don't set in encode_header
 218     COPY(partitioned_frame);    // FIXME don't set in encode_header
 219 #undef COPY
 220 }
 221
 222 /**
 223  * Set the given MpegEncContext to defaults for encoding.
 224  * the changed fields will not depend upon the prior state of the MpegEncContext.
 225  */
 226 static void mpv_encode_defaults(MpegEncContext *s)
 227 {
 228     int i;
 229     ff_mpv_common_defaults(s);
 230
 231     for (i = -16; i < 16; i++) {
 232         default_fcode_tab[i + MAX_MV] = 1;
 233     }
 234     s->me.mv_penalty = default_mv_penalty;
 235     s->fcode_tab     = default_fcode_tab;
 236
 237     s->input_picture_number  = 0;
 238     s->picture_in_gop_number = 0;
 239 }
 240
 241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 242     if (ARCH_X86)
 243         ff_dct_encode_init_x86(s);
 244
 245     if (CONFIG_H263_ENCODER)
 246         ff_h263dsp_init(&s->h263dsp);
 247     if (!s->dct_quantize)
 248         s->dct_quantize = ff_dct_quantize_c;
 249     if (!s->denoise_dct)
 250         s->denoise_dct  = denoise_dct_c;
 251     s->fast_dct_quantize = s->dct_quantize;
 252     if (s->avctx->trellis)
 253         s->dct_quantize  = dct_quantize_trellis_c;
 254
 255     return 0;
 256 }
 257
 258 /* init video encoder */
 259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 260 {
 261     MpegEncContext *s = avctx->priv_data;
 262     int i, ret, format_supported;
 263
 264     mpv_encode_defaults(s);
 265
 266     switch (avctx->codec_id) {
 267     case AV_CODEC_ID_MPEG2VIDEO:
 268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 270             av_log(avctx, AV_LOG_ERROR,
 271                    "only YUV420 and YUV422 are supported\n");
 272             return -1;
 273         }
 274         break;
 275     case AV_CODEC_ID_MJPEG:
 276     case AV_CODEC_ID_AMV:
 277         format_supported = 0;
 278         /* JPEG color space */
 279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 282             (avctx->color_range == AVCOL_RANGE_JPEG &&
 283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 286             format_supported = 1;
 287         /* MPEG color space */
 288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 292             format_supported = 1;
 293
 294         if (!format_supported) {
 295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 296             return -1;
 297         }
 298         break;
 299     default:
 300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 302             return -1;
 303         }
 304     }
 305
 306     switch (avctx->pix_fmt) {
 307     case AV_PIX_FMT_YUVJ444P:
 308     case AV_PIX_FMT_YUV444P:
 309         s->chroma_format = CHROMA_444;
 310         break;
 311     case AV_PIX_FMT_YUVJ422P:
 312     case AV_PIX_FMT_YUV422P:
 313         s->chroma_format = CHROMA_422;
 314         break;
 315     case AV_PIX_FMT_YUVJ420P:
 316     case AV_PIX_FMT_YUV420P:
 317     default:
 318         s->chroma_format = CHROMA_420;
 319         break;
 320     }
 321
 322     s->bit_rate = avctx->bit_rate;
 323     s->width    = avctx->width;
 324     s->height   = avctx->height;
 325     if (avctx->gop_size > 600 &&
 326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 327         av_log(avctx, AV_LOG_WARNING,
 328                "keyframe interval too large!, reducing it from %d to %d\n",
 329                avctx->gop_size, 600);
 330         avctx->gop_size = 600;
 331     }
 332     s->gop_size     = avctx->gop_size;
 333     s->avctx        = avctx;
 334     if (avctx->max_b_frames > MAX_B_FRAMES) {
 335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 336                "is %d.\n", MAX_B_FRAMES);
 337         avctx->max_b_frames = MAX_B_FRAMES;
 338     }
 339     s->max_b_frames = avctx->max_b_frames;
 340     s->codec_id     = avctx->codec->id;
 341     s->strict_std_compliance = avctx->strict_std_compliance;
 342     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 343     s->mpeg_quant         = avctx->mpeg_quant;
 344     s->rtp_mode           = !!avctx->rtp_payload_size;
 345     s->intra_dc_precision = avctx->intra_dc_precision;
 346
 347     // workaround some differences between how applications specify dc precision
 348     if (s->intra_dc_precision < 0) {
 349         s->intra_dc_precision += 8;
 350     } else if (s->intra_dc_precision >= 8)
 351         s->intra_dc_precision -= 8;
 352
 353     if (s->intra_dc_precision < 0) {
 354         av_log(avctx, AV_LOG_ERROR,
 355                 "intra dc precision must be positive, note some applications use"
 356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 357         return AVERROR(EINVAL);
 358     }
 359
 360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 362         return AVERROR(EINVAL);
 363     }
 364     s->user_specified_pts = AV_NOPTS_VALUE;
 365
 366     if (s->gop_size <= 1) {
 367         s->intra_only = 1;
 368         s->gop_size   = 12;
 369     } else {
 370         s->intra_only = 0;
 371     }
 372
 373     s->me_method = avctx->me_method;
 374
 375     /* Fixed QSCALE */
 376     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 377
 378 #if FF_API_MPV_OPT
 379     FF_DISABLE_DEPRECATION_WARNINGS
 380     if (avctx->border_masking != 0.0)
 381         s->border_masking = avctx->border_masking;
 382     FF_ENABLE_DEPRECATION_WARNINGS
 383 #endif
 384
 385     s->adaptive_quant = (s->avctx->lumi_masking ||
 386                          s->avctx->dark_masking ||
 387                          s->avctx->temporal_cplx_masking ||
 388                          s->avctx->spatial_cplx_masking  ||
 389                          s->avctx->p_masking      ||
 390                          s->border_masking ||
 391                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 392                         !s->fixed_qscale;
 393
 394     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 395
 396     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 397         switch(avctx->codec_id) {
 398         case AV_CODEC_ID_MPEG1VIDEO:
 399         case AV_CODEC_ID_MPEG2VIDEO:
 400             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 401             break;
 402         case AV_CODEC_ID_MPEG4:
 403         case AV_CODEC_ID_MSMPEG4V1:
 404         case AV_CODEC_ID_MSMPEG4V2:
 405         case AV_CODEC_ID_MSMPEG4V3:
 406             if       (avctx->rc_max_rate >= 15000000) {
 407                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 408             } else if(avctx->rc_max_rate >=  2000000) {
 409                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 410             } else if(avctx->rc_max_rate >=   384000) {
 411                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 412             } else
 413                 avctx->rc_buffer_size = 40;
 414             avctx->rc_buffer_size *= 16384;
 415             break;
 416         }
 417         if (avctx->rc_buffer_size) {
 418             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 419         }
 420     }
 421
 422     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 423         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 424         return -1;
 425     }
 426
 427     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 428         av_log(avctx, AV_LOG_INFO,
 429                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 430     }
 431
 432     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 433         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 434         return -1;
 435     }
 436
 437     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 438         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 439         return -1;
 440     }
 441
 442     if (avctx->rc_max_rate &&
 443         avctx->rc_max_rate == avctx->bit_rate &&
 444         avctx->rc_max_rate != avctx->rc_min_rate) {
 445         av_log(avctx, AV_LOG_INFO,
 446                "impossible bitrate constraints, this will fail\n");
 447     }
 448
 449     if (avctx->rc_buffer_size &&
 450         avctx->bit_rate * (int64_t)avctx->time_base.num >
 451             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 452         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 453         return -1;
 454     }
 455
 456     if (!s->fixed_qscale &&
 457         avctx->bit_rate * av_q2d(avctx->time_base) >
 458             avctx->bit_rate_tolerance) {
 459         av_log(avctx, AV_LOG_WARNING,
 460                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 461         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 462     }
 463
 464     if (s->avctx->rc_max_rate &&
 465         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 466         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 467          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 468         90000LL * (avctx->rc_buffer_size - 1) >
 469             s->avctx->rc_max_rate * 0xFFFFLL) {
 470         av_log(avctx, AV_LOG_INFO,
 471                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 472                "specified vbv buffer is too large for the given bitrate!\n");
 473     }
 474
 475     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 476         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 477         s->codec_id != AV_CODEC_ID_FLV1) {
 478         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 479         return -1;
 480     }
 481
 482     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 483         av_log(avctx, AV_LOG_ERROR,
 484                "OBMC is only supported with simple mb decision\n");
 485         return -1;
 486     }
 487
 488     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 489         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 490         return -1;
 491     }
 492
 493     if (s->max_b_frames                    &&
 494         s->codec_id != AV_CODEC_ID_MPEG4      &&
 495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 497         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 498         return -1;
 499     }
 500     if (s->max_b_frames < 0) {
 501         av_log(avctx, AV_LOG_ERROR,
 502                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 503         return -1;
 504     }
 505
 506     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 507          s->codec_id == AV_CODEC_ID_H263  ||
 508          s->codec_id == AV_CODEC_ID_H263P) &&
 509         (avctx->sample_aspect_ratio.num > 255 ||
 510          avctx->sample_aspect_ratio.den > 255)) {
 511         av_log(avctx, AV_LOG_WARNING,
 512                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 513                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 514         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 515                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 516     }
 517
 518     if ((s->codec_id == AV_CODEC_ID_H263  ||
 519          s->codec_id == AV_CODEC_ID_H263P) &&
 520         (avctx->width  > 2048 ||
 521          avctx->height > 1152 )) {
 522         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 523         return -1;
 524     }
 525     if ((s->codec_id == AV_CODEC_ID_H263  ||
 526          s->codec_id == AV_CODEC_ID_H263P) &&
 527         ((avctx->width &3) ||
 528          (avctx->height&3) )) {
 529         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 530         return -1;
 531     }
 532
 533     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 534         (avctx->width  > 4095 ||
 535          avctx->height > 4095 )) {
 536         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 537         return -1;
 538     }
 539
 540     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 541         (avctx->width  > 16383 ||
 542          avctx->height > 16383 )) {
 543         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 544         return -1;
 545     }
 546
 547     if (s->codec_id == AV_CODEC_ID_RV10 &&
 548         (avctx->width &15 ||
 549          avctx->height&15 )) {
 550         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 551         return AVERROR(EINVAL);
 552     }
 553
 554     if (s->codec_id == AV_CODEC_ID_RV20 &&
 555         (avctx->width &3 ||
 556          avctx->height&3 )) {
 557         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 558         return AVERROR(EINVAL);
 559     }
 560
 561     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 562          s->codec_id == AV_CODEC_ID_WMV2) &&
 563          avctx->width & 1) {
 564          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 565          return -1;
 566     }
 567
 568     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 569         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 570         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 571         return -1;
 572     }
 573
 574     // FIXME mpeg2 uses that too
 575     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 576                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 577         av_log(avctx, AV_LOG_ERROR,
 578                "mpeg2 style quantization not supported by codec\n");
 579         return -1;
 580     }
 581
 582     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 583         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 584         return -1;
 585     }
 586
 587     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 588         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 589         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 590         return -1;
 591     }
 592
 593     if (s->avctx->scenechange_threshold < 1000000000 &&
 594         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 595         av_log(avctx, AV_LOG_ERROR,
 596                "closed gop with scene change detection are not supported yet, "
 597                "set threshold to 1000000000\n");
 598         return -1;
 599     }
 600
 601     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 602         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 603             av_log(avctx, AV_LOG_ERROR,
 604                   "low delay forcing is only available for mpeg2\n");
 605             return -1;
 606         }
 607         if (s->max_b_frames != 0) {
 608             av_log(avctx, AV_LOG_ERROR,
 609                    "b frames cannot be used with low delay\n");
 610             return -1;
 611         }
 612     }
 613
 614     if (s->q_scale_type == 1) {
 615         if (avctx->qmax > 12) {
 616             av_log(avctx, AV_LOG_ERROR,
 617                    "non linear quant only supports qmax <= 12 currently\n");
 618             return -1;
 619         }
 620     }
 621
 622     if (s->avctx->thread_count > 1         &&
 623         s->codec_id != AV_CODEC_ID_MPEG4      &&
 624         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 625         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 626         s->codec_id != AV_CODEC_ID_MJPEG      &&
 627         (s->codec_id != AV_CODEC_ID_H263P)) {
 628         av_log(avctx, AV_LOG_ERROR,
 629                "multi threaded encoding not supported by codec\n");
 630         return -1;
 631     }
 632
 633     if (s->avctx->thread_count < 1) {
 634         av_log(avctx, AV_LOG_ERROR,
 635                "automatic thread number detection not supported by codec, "
 636                "patch welcome\n");
 637         return -1;
 638     }
 639
 640     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 641         s->rtp_mode = 1;
 642
 643     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 644         s->h263_slice_structured = 1;
 645
 646     if (!avctx->time_base.den || !avctx->time_base.num) {
 647         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 648         return -1;
 649     }
 650
 651     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 652         av_log(avctx, AV_LOG_INFO,
 653                "notice: b_frame_strategy only affects the first pass\n");
 654         avctx->b_frame_strategy = 0;
 655     }
 656
 657     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 658     if (i > 1) {
 659         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 660         avctx->time_base.den /= i;
 661         avctx->time_base.num /= i;
 662         //return -1;
 663     }
 664
 665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 666         // (a + x * 3 / 8) / x
 667         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 668         s->inter_quant_bias = 0;
 669     } else {
 670         s->intra_quant_bias = 0;
 671         // (a - x / 4) / x
 672         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 673     }
 674
 675     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 676         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 677         return AVERROR(EINVAL);
 678     }
 679
 680     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->intra_quant_bias = avctx->intra_quant_bias;
 682     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 683         s->inter_quant_bias = avctx->inter_quant_bias;
 684
 685     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 686
 687     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 688         s->avctx->time_base.den > (1 << 16) - 1) {
 689         av_log(avctx, AV_LOG_ERROR,
 690                "timebase %d/%d not supported by MPEG 4 standard, "
 691                "the maximum admitted value for the timebase denominator "
 692                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 693                (1 << 16) - 1);
 694         return -1;
 695     }
 696     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 697
 698     switch (avctx->codec->id) {
 699     case AV_CODEC_ID_MPEG1VIDEO:
 700         s->out_format = FMT_MPEG1;
 701         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 702         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 703         break;
 704     case AV_CODEC_ID_MPEG2VIDEO:
 705         s->out_format = FMT_MPEG1;
 706         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 707         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 708         s->rtp_mode   = 1;
 709         break;
 710     case AV_CODEC_ID_MJPEG:
 711     case AV_CODEC_ID_AMV:
 712         s->out_format = FMT_MJPEG;
 713         s->intra_only = 1; /* force intra only for jpeg */
 714         if (!CONFIG_MJPEG_ENCODER ||
 715             ff_mjpeg_encode_init(s) < 0)
 716             return -1;
 717         avctx->delay = 0;
 718         s->low_delay = 1;
 719         break;
 720     case AV_CODEC_ID_H261:
 721         if (!CONFIG_H261_ENCODER)
 722             return -1;
 723         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 724             av_log(avctx, AV_LOG_ERROR,
 725                    "The specified picture size of %dx%d is not valid for the "
 726                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 727                     s->width, s->height);
 728             return -1;
 729         }
 730         s->out_format = FMT_H261;
 731         avctx->delay  = 0;
 732         s->low_delay  = 1;
 733         s->rtp_mode   = 0; /* Sliced encoding not supported */
 734         break;
 735     case AV_CODEC_ID_H263:
 736         if (!CONFIG_H263_ENCODER)
 737             return -1;
 738         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 739                              s->width, s->height) == 8) {
 740             av_log(avctx, AV_LOG_ERROR,
 741                    "The specified picture size of %dx%d is not valid for "
 742                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 743                    "352x288, 704x576, and 1408x1152. "
 744                    "Try H.263+.\n", s->width, s->height);
 745             return -1;
 746         }
 747         s->out_format = FMT_H263;
 748         avctx->delay  = 0;
 749         s->low_delay  = 1;
 750         break;
 751     case AV_CODEC_ID_H263P:
 752         s->out_format = FMT_H263;
 753         s->h263_plus  = 1;
 754         /* Fx */
 755         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 756         s->modified_quant  = s->h263_aic;
 757         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 758         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 759
 760         /* /Fx */
 761         /* These are just to be sure */
 762         avctx->delay = 0;
 763         s->low_delay = 1;
 764         break;
 765     case AV_CODEC_ID_FLV1:
 766         s->out_format      = FMT_H263;
 767         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 768         s->unrestricted_mv = 1;
 769         s->rtp_mode  = 0; /* don't allow GOB */
 770         avctx->delay = 0;
 771         s->low_delay = 1;
 772         break;
 773     case AV_CODEC_ID_RV10:
 774         s->out_format = FMT_H263;
 775         avctx->delay  = 0;
 776         s->low_delay  = 1;
 777         break;
 778     case AV_CODEC_ID_RV20:
 779         s->out_format      = FMT_H263;
 780         avctx->delay       = 0;
 781         s->low_delay       = 1;
 782         s->modified_quant  = 1;
 783         s->h263_aic        = 1;
 784         s->h263_plus       = 1;
 785         s->loop_filter     = 1;
 786         s->unrestricted_mv = 0;
 787         break;
 788     case AV_CODEC_ID_MPEG4:
 789         s->out_format      = FMT_H263;
 790         s->h263_pred       = 1;
 791         s->unrestricted_mv = 1;
 792         s->low_delay       = s->max_b_frames ? 0 : 1;
 793         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 794         break;
 795     case AV_CODEC_ID_MSMPEG4V2:
 796         s->out_format      = FMT_H263;
 797         s->h263_pred       = 1;
 798         s->unrestricted_mv = 1;
 799         s->msmpeg4_version = 2;
 800         avctx->delay       = 0;
 801         s->low_delay       = 1;
 802         break;
 803     case AV_CODEC_ID_MSMPEG4V3:
 804         s->out_format        = FMT_H263;
 805         s->h263_pred         = 1;
 806         s->unrestricted_mv   = 1;
 807         s->msmpeg4_version   = 3;
 808         s->flipflop_rounding = 1;
 809         avctx->delay         = 0;
 810         s->low_delay         = 1;
 811         break;
 812     case AV_CODEC_ID_WMV1:
 813         s->out_format        = FMT_H263;
 814         s->h263_pred         = 1;
 815         s->unrestricted_mv   = 1;
 816         s->msmpeg4_version   = 4;
 817         s->flipflop_rounding = 1;
 818         avctx->delay         = 0;
 819         s->low_delay         = 1;
 820         break;
 821     case AV_CODEC_ID_WMV2:
 822         s->out_format        = FMT_H263;
 823         s->h263_pred         = 1;
 824         s->unrestricted_mv   = 1;
 825         s->msmpeg4_version   = 5;
 826         s->flipflop_rounding = 1;
 827         avctx->delay         = 0;
 828         s->low_delay         = 1;
 829         break;
 830     default:
 831         return -1;
 832     }
 833
 834     avctx->has_b_frames = !s->low_delay;
 835
 836     s->encoding = 1;
 837
 838     s->progressive_frame    =
 839     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 840                                                 CODEC_FLAG_INTERLACED_ME) ||
 841                                 s->alternate_scan);
 842
 843     /* init */
 844     ff_mpv_idct_init(s);
 845     if (ff_mpv_common_init(s) < 0)
 846         return -1;
 847
 848     ff_fdctdsp_init(&s->fdsp, avctx);
 849     ff_me_cmp_init(&s->mecc, avctx);
 850     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 851     ff_pixblockdsp_init(&s->pdsp, avctx);
 852     ff_qpeldsp_init(&s->qdsp);
 853
 854     s->avctx->coded_frame = s->current_picture.f;
 855
 856     if (s->msmpeg4_version) {
 857         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 858                           2 * 2 * (MAX_LEVEL + 1) *
 859                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 860     }
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 862
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 868     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 872                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 873
 874     if (s->avctx->noise_reduction) {
 875         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 876                           2 * 64 * sizeof(uint16_t), fail);
 877     }
 878
 879     ff_dct_encode_init(s);
 880
 881     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 882         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 883
 884     s->quant_precision = 5;
 885
 886     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 887     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 888
 889     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 890         ff_h261_encode_init(s);
 891     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 892         ff_h263_encode_init(s);
 893     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 894         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 895             return ret;
 896     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 897         && s->out_format == FMT_MPEG1)
 898         ff_mpeg1_encode_init(s);
 899
 900     /* init q matrix */
 901     for (i = 0; i < 64; i++) {
 902         int j = s->idsp.idct_permutation[i];
 903         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 904             s->mpeg_quant) {
 905             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 906             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 907         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 908             s->intra_matrix[j] =
 909             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 910         } else {
 911             /* mpeg1/2 */
 912             s->chroma_intra_matrix[j] =
 913             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 914             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 915         }
 916         if (s->avctx->intra_matrix)
 917             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 918         if (s->avctx->inter_matrix)
 919             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 920     }
 921
 922     /* precompute matrix */
 923     /* for mjpeg, we do include qscale in the matrix */
 924     if (s->out_format != FMT_MJPEG) {
 925         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 926                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 927                           31, 1);
 928         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 929                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 930                           31, 0);
 931     }
 932
 933     if (ff_rate_control_init(s) < 0)
 934         return -1;
 935
 936 #if FF_API_ERROR_RATE
 937     FF_DISABLE_DEPRECATION_WARNINGS
 938     if (avctx->error_rate)
 939         s->error_rate = avctx->error_rate;
 940     FF_ENABLE_DEPRECATION_WARNINGS;
 941 #endif
 942
 943 #if FF_API_NORMALIZE_AQP
 944     FF_DISABLE_DEPRECATION_WARNINGS
 945     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 946         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 947     FF_ENABLE_DEPRECATION_WARNINGS;
 948 #endif
 949
 950 #if FF_API_MV0
 951     FF_DISABLE_DEPRECATION_WARNINGS
 952     if (avctx->flags & CODEC_FLAG_MV0)
 953         s->mpv_flags |= FF_MPV_FLAG_MV0;
 954     FF_ENABLE_DEPRECATION_WARNINGS
 955 #endif
 956
 957 #if FF_API_MPV_OPT
 958     FF_DISABLE_DEPRECATION_WARNINGS
 959     if (avctx->rc_qsquish != 0.0)
 960         s->rc_qsquish = avctx->rc_qsquish;
 961     if (avctx->rc_qmod_amp != 0.0)
 962         s->rc_qmod_amp = avctx->rc_qmod_amp;
 963     if (avctx->rc_qmod_freq)
 964         s->rc_qmod_freq = avctx->rc_qmod_freq;
 965     if (avctx->rc_buffer_aggressivity != 1.0)
 966         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 967     if (avctx->rc_initial_cplx != 0.0)
 968         s->rc_initial_cplx = avctx->rc_initial_cplx;
 969     if (avctx->lmin)
 970         s->lmin = avctx->lmin;
 971     if (avctx->lmax)
 972         s->lmax = avctx->lmax;
 973
 974     if (avctx->rc_eq) {
 975         av_freep(&s->rc_eq);
 976         s->rc_eq = av_strdup(avctx->rc_eq);
 977         if (!s->rc_eq)
 978             return AVERROR(ENOMEM);
 979     }
 980     FF_ENABLE_DEPRECATION_WARNINGS
 981 #endif
 982
 983     if (avctx->b_frame_strategy == 2) {
 984         for (i = 0; i < s->max_b_frames + 2; i++) {
 985             s->tmp_frames[i] = av_frame_alloc();
 986             if (!s->tmp_frames[i])
 987                 return AVERROR(ENOMEM);
 988
 989             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 990             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 991             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 992
 993             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 994             if (ret < 0)
 995                 return ret;
 996         }
 997     }
 998
 999     return 0;
1000 fail:
1001     ff_mpv_encode_end(avctx);
1002     return AVERROR_UNKNOWN;
1003 }
1004
1005 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1006 {
1007     MpegEncContext *s = avctx->priv_data;
1008     int i;
1009
1010     ff_rate_control_uninit(s);
1011
1012     ff_mpv_common_end(s);
1013     if (CONFIG_MJPEG_ENCODER &&
1014         s->out_format == FMT_MJPEG)
1015         ff_mjpeg_encode_close(s);
1016
1017     av_freep(&avctx->extradata);
1018
1019     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1020         av_frame_free(&s->tmp_frames[i]);
1021
1022     ff_free_picture_tables(&s->new_picture);
1023     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1024
1025     av_freep(&s->avctx->stats_out);
1026     av_freep(&s->ac_stats);
1027
1028     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1029     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1030     s->q_chroma_intra_matrix=   NULL;
1031     s->q_chroma_intra_matrix16= NULL;
1032     av_freep(&s->q_intra_matrix);
1033     av_freep(&s->q_inter_matrix);
1034     av_freep(&s->q_intra_matrix16);
1035     av_freep(&s->q_inter_matrix16);
1036     av_freep(&s->input_picture);
1037     av_freep(&s->reordered_input_picture);
1038     av_freep(&s->dct_offset);
1039
1040     return 0;
1041 }
1042
1043 static int get_sae(uint8_t *src, int ref, int stride)
1044 {
1045     int x,y;
1046     int acc = 0;
1047
1048     for (y = 0; y < 16; y++) {
1049         for (x = 0; x < 16; x++) {
1050             acc += FFABS(src[x + y * stride] - ref);
1051         }
1052     }
1053
1054     return acc;
1055 }
1056
1057 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1058                            uint8_t *ref, int stride)
1059 {
1060     int x, y, w, h;
1061     int acc = 0;
1062
1063     w = s->width  & ~15;
1064     h = s->height & ~15;
1065
1066     for (y = 0; y < h; y += 16) {
1067         for (x = 0; x < w; x += 16) {
1068             int offset = x + y * stride;
1069             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1070                                       stride, 16);
1071             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1072             int sae  = get_sae(src + offset, mean, stride);
1073
1074             acc += sae + 500 < sad;
1075         }
1076     }
1077     return acc;
1078 }
1079
1080 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1081 {
1082     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1083                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1084                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1085                             &s->linesize, &s->uvlinesize);
1086 }
1087
1088 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1089 {
1090     Picture *pic = NULL;
1091     int64_t pts;
1092     int i, display_picture_number = 0, ret;
1093     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1094                                                  (s->low_delay ? 0 : 1);
1095     int direct = 1;
1096
1097     if (pic_arg) {
1098         pts = pic_arg->pts;
1099         display_picture_number = s->input_picture_number++;
1100
1101         if (pts != AV_NOPTS_VALUE) {
1102             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1103                 int64_t last = s->user_specified_pts;
1104
1105                 if (pts <= last) {
1106                     av_log(s->avctx, AV_LOG_ERROR,
1107                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1108                            pts, last);
1109                     return AVERROR(EINVAL);
1110                 }
1111
1112                 if (!s->low_delay && display_picture_number == 1)
1113                     s->dts_delta = pts - last;
1114             }
1115             s->user_specified_pts = pts;
1116         } else {
1117             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1118                 s->user_specified_pts =
1119                 pts = s->user_specified_pts + 1;
1120                 av_log(s->avctx, AV_LOG_INFO,
1121                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1122                        pts);
1123             } else {
1124                 pts = display_picture_number;
1125             }
1126         }
1127     }
1128
1129     if (pic_arg) {
1130         if (!pic_arg->buf[0] ||
1131             pic_arg->linesize[0] != s->linesize ||
1132             pic_arg->linesize[1] != s->uvlinesize ||
1133             pic_arg->linesize[2] != s->uvlinesize)
1134             direct = 0;
1135         if ((s->width & 15) || (s->height & 15))
1136             direct = 0;
1137         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1138             direct = 0;
1139         if (s->linesize & (STRIDE_ALIGN-1))
1140             direct = 0;
1141
1142         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1143                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1144
1145         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1146         if (i < 0)
1147             return i;
1148
1149         pic = &s->picture[i];
1150         pic->reference = 3;
1151
1152         if (direct) {
1153             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1154                 return ret;
1155         }
1156         ret = alloc_picture(s, pic, direct);
1157         if (ret < 0)
1158             return ret;
1159
1160         if (!direct) {
1161             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1162                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1163                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1164                 // empty
1165             } else {
1166                 int h_chroma_shift, v_chroma_shift;
1167                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1168                                                  &h_chroma_shift,
1169                                                  &v_chroma_shift);
1170
1171                 for (i = 0; i < 3; i++) {
1172                     int src_stride = pic_arg->linesize[i];
1173                     int dst_stride = i ? s->uvlinesize : s->linesize;
1174                     int h_shift = i ? h_chroma_shift : 0;
1175                     int v_shift = i ? v_chroma_shift : 0;
1176                     int w = s->width  >> h_shift;
1177                     int h = s->height >> v_shift;
1178                     uint8_t *src = pic_arg->data[i];
1179                     uint8_t *dst = pic->f->data[i];
1180                     int vpad = 16;
1181
1182                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1183                         && !s->progressive_sequence
1184                         && FFALIGN(s->height, 32) - s->height > 16)
1185                         vpad = 32;
1186
1187                     if (!s->avctx->rc_buffer_size)
1188                         dst += INPLACE_OFFSET;
1189
1190                     if (src_stride == dst_stride)
1191                         memcpy(dst, src, src_stride * h);
1192                     else {
1193                         int h2 = h;
1194                         uint8_t *dst2 = dst;
1195                         while (h2--) {
1196                             memcpy(dst2, src, w);
1197                             dst2 += dst_stride;
1198                             src += src_stride;
1199                         }
1200                     }
1201                     if ((s->width & 15) || (s->height & (vpad-1))) {
1202                         s->mpvencdsp.draw_edges(dst, dst_stride,
1203                                                 w, h,
1204                                                 16 >> h_shift,
1205                                                 vpad >> v_shift,
1206                                                 EDGE_BOTTOM);
1207                     }
1208                 }
1209             }
1210         }
1211         ret = av_frame_copy_props(pic->f, pic_arg);
1212         if (ret < 0)
1213             return ret;
1214
1215         pic->f->display_picture_number = display_picture_number;
1216         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1217     }
1218
1219     /* shift buffer entries */
1220     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1221         s->input_picture[i - 1] = s->input_picture[i];
1222
1223     s->input_picture[encoding_delay] = (Picture*) pic;
1224
1225     return 0;
1226 }
1227
1228 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1229 {
1230     int x, y, plane;
1231     int score = 0;
1232     int64_t score64 = 0;
1233
1234     for (plane = 0; plane < 3; plane++) {
1235         const int stride = p->f->linesize[plane];
1236         const int bw = plane ? 1 : 2;
1237         for (y = 0; y < s->mb_height * bw; y++) {
1238             for (x = 0; x < s->mb_width * bw; x++) {
1239                 int off = p->shared ? 0 : 16;
1240                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1241                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1242                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1243
1244                 switch (FFABS(s->avctx->frame_skip_exp)) {
1245                 case 0: score    =  FFMAX(score, v);          break;
1246                 case 1: score   += FFABS(v);                  break;
1247                 case 2: score64 += v * (int64_t)v;                       break;
1248                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1249                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1250                 }
1251             }
1252         }
1253     }
1254     emms_c();
1255
1256     if (score)
1257         score64 = score;
1258     if (s->avctx->frame_skip_exp < 0)
1259         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1260                       -1.0/s->avctx->frame_skip_exp);
1261
1262     if (score64 < s->avctx->frame_skip_threshold)
1263         return 1;
1264     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1265         return 1;
1266     return 0;
1267 }
1268
1269 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1270 {
1271     AVPacket pkt = { 0 };
1272     int ret, got_output;
1273
1274     av_init_packet(&pkt);
1275     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1276     if (ret < 0)
1277         return ret;
1278
1279     ret = pkt.size;
1280     av_free_packet(&pkt);
1281     return ret;
1282 }
1283
1284 static int estimate_best_b_count(MpegEncContext *s)
1285 {
1286     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1287     AVCodecContext *c = avcodec_alloc_context3(NULL);
1288     const int scale = s->avctx->brd_scale;
1289     int i, j, out_size, p_lambda, b_lambda, lambda2;
1290     int64_t best_rd  = INT64_MAX;
1291     int best_b_count = -1;
1292
1293     if (!c)
1294         return AVERROR(ENOMEM);
1295     av_assert0(scale >= 0 && scale <= 3);
1296
1297     //emms_c();
1298     //s->next_picture_ptr->quality;
1299     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1300     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1301     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1302     if (!b_lambda) // FIXME we should do this somewhere else
1303         b_lambda = p_lambda;
1304     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1305                FF_LAMBDA_SHIFT;
1306
1307     c->width        = s->width  >> scale;
1308     c->height       = s->height >> scale;
1309     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1310     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1311     c->mb_decision  = s->avctx->mb_decision;
1312     c->me_cmp       = s->avctx->me_cmp;
1313     c->mb_cmp       = s->avctx->mb_cmp;
1314     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1315     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1316     c->time_base    = s->avctx->time_base;
1317     c->max_b_frames = s->max_b_frames;
1318
1319     if (avcodec_open2(c, codec, NULL) < 0)
1320         return -1;
1321
1322     for (i = 0; i < s->max_b_frames + 2; i++) {
1323         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1324                                                 s->next_picture_ptr;
1325         uint8_t *data[4];
1326
1327         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1328             pre_input = *pre_input_ptr;
1329             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1330
1331             if (!pre_input.shared && i) {
1332                 data[0] += INPLACE_OFFSET;
1333                 data[1] += INPLACE_OFFSET;
1334                 data[2] += INPLACE_OFFSET;
1335             }
1336
1337             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1338                                        s->tmp_frames[i]->linesize[0],
1339                                        data[0],
1340                                        pre_input.f->linesize[0],
1341                                        c->width, c->height);
1342             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1343                                        s->tmp_frames[i]->linesize[1],
1344                                        data[1],
1345                                        pre_input.f->linesize[1],
1346                                        c->width >> 1, c->height >> 1);
1347             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1348                                        s->tmp_frames[i]->linesize[2],
1349                                        data[2],
1350                                        pre_input.f->linesize[2],
1351                                        c->width >> 1, c->height >> 1);
1352         }
1353     }
1354
1355     for (j = 0; j < s->max_b_frames + 1; j++) {
1356         int64_t rd = 0;
1357
1358         if (!s->input_picture[j])
1359             break;
1360
1361         c->error[0] = c->error[1] = c->error[2] = 0;
1362
1363         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1364         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1365
1366         out_size = encode_frame(c, s->tmp_frames[0]);
1367
1368         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1369
1370         for (i = 0; i < s->max_b_frames + 1; i++) {
1371             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1372
1373             s->tmp_frames[i + 1]->pict_type = is_p ?
1374                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1375             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1376
1377             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1378
1379             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1380         }
1381
1382         /* get the delayed frames */
1383         while (out_size) {
1384             out_size = encode_frame(c, NULL);
1385             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1386         }
1387
1388         rd += c->error[0] + c->error[1] + c->error[2];
1389
1390         if (rd < best_rd) {
1391             best_rd = rd;
1392             best_b_count = j;
1393         }
1394     }
1395
1396     avcodec_close(c);
1397     av_freep(&c);
1398
1399     return best_b_count;
1400 }
1401
1402 static int select_input_picture(MpegEncContext *s)
1403 {
1404     int i, ret;
1405
1406     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1407         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1408     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1409
1410     /* set next picture type & ordering */
1411     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1412         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1413             if (s->picture_in_gop_number < s->gop_size &&
1414                 s->next_picture_ptr &&
1415                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1416                 // FIXME check that te gop check above is +-1 correct
1417                 av_frame_unref(s->input_picture[0]->f);
1418
1419                 ff_vbv_update(s, 0);
1420
1421                 goto no_output_pic;
1422             }
1423         }
1424
1425         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1426             !s->next_picture_ptr || s->intra_only) {
1427             s->reordered_input_picture[0] = s->input_picture[0];
1428             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1429             s->reordered_input_picture[0]->f->coded_picture_number =
1430                 s->coded_picture_number++;
1431         } else {
1432             int b_frames;
1433
1434             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1435                 for (i = 0; i < s->max_b_frames + 1; i++) {
1436                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1437
1438                     if (pict_num >= s->rc_context.num_entries)
1439                         break;
1440                     if (!s->input_picture[i]) {
1441                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1442                         break;
1443                     }
1444
1445                     s->input_picture[i]->f->pict_type =
1446                         s->rc_context.entry[pict_num].new_pict_type;
1447                 }
1448             }
1449
1450             if (s->avctx->b_frame_strategy == 0) {
1451                 b_frames = s->max_b_frames;
1452                 while (b_frames && !s->input_picture[b_frames])
1453                     b_frames--;
1454             } else if (s->avctx->b_frame_strategy == 1) {
1455                 for (i = 1; i < s->max_b_frames + 1; i++) {
1456                     if (s->input_picture[i] &&
1457                         s->input_picture[i]->b_frame_score == 0) {
1458                         s->input_picture[i]->b_frame_score =
1459                             get_intra_count(s,
1460                                             s->input_picture[i    ]->f->data[0],
1461                                             s->input_picture[i - 1]->f->data[0],
1462                                             s->linesize) + 1;
1463                     }
1464                 }
1465                 for (i = 0; i < s->max_b_frames + 1; i++) {
1466                     if (!s->input_picture[i] ||
1467                         s->input_picture[i]->b_frame_score - 1 >
1468                             s->mb_num / s->avctx->b_sensitivity)
1469                         break;
1470                 }
1471
1472                 b_frames = FFMAX(0, i - 1);
1473
1474                 /* reset scores */
1475                 for (i = 0; i < b_frames + 1; i++) {
1476                     s->input_picture[i]->b_frame_score = 0;
1477                 }
1478             } else if (s->avctx->b_frame_strategy == 2) {
1479                 b_frames = estimate_best_b_count(s);
1480             } else {
1481                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1482                 b_frames = 0;
1483             }
1484
1485             emms_c();
1486
1487             for (i = b_frames - 1; i >= 0; i--) {
1488                 int type = s->input_picture[i]->f->pict_type;
1489                 if (type && type != AV_PICTURE_TYPE_B)
1490                     b_frames = i;
1491             }
1492             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1493                 b_frames == s->max_b_frames) {
1494                 av_log(s->avctx, AV_LOG_ERROR,
1495                        "warning, too many b frames in a row\n");
1496             }
1497
1498             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1499                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1500                     s->gop_size > s->picture_in_gop_number) {
1501                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1502                 } else {
1503                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1504                         b_frames = 0;
1505                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1506                 }
1507             }
1508
1509             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1510                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1511                 b_frames--;
1512
1513             s->reordered_input_picture[0] = s->input_picture[b_frames];
1514             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1515                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1516             s->reordered_input_picture[0]->f->coded_picture_number =
1517                 s->coded_picture_number++;
1518             for (i = 0; i < b_frames; i++) {
1519                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1520                 s->reordered_input_picture[i + 1]->f->pict_type =
1521                     AV_PICTURE_TYPE_B;
1522                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1523                     s->coded_picture_number++;
1524             }
1525         }
1526     }
1527 no_output_pic:
1528     if (s->reordered_input_picture[0]) {
1529         s->reordered_input_picture[0]->reference =
1530            s->reordered_input_picture[0]->f->pict_type !=
1531                AV_PICTURE_TYPE_B ? 3 : 0;
1532
1533         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1534         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1535             return ret;
1536
1537         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1538             // input is a shared pix, so we can't modifiy it -> alloc a new
1539             // one & ensure that the shared one is reuseable
1540
1541             Picture *pic;
1542             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1543             if (i < 0)
1544                 return i;
1545             pic = &s->picture[i];
1546
1547             pic->reference = s->reordered_input_picture[0]->reference;
1548             if (alloc_picture(s, pic, 0) < 0) {
1549                 return -1;
1550             }
1551
1552             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1553             if (ret < 0)
1554                 return ret;
1555
1556             /* mark us unused / free shared pic */
1557             av_frame_unref(s->reordered_input_picture[0]->f);
1558             s->reordered_input_picture[0]->shared = 0;
1559
1560             s->current_picture_ptr = pic;
1561         } else {
1562             // input is not a shared pix -> reuse buffer for current_pix
1563             s->current_picture_ptr = s->reordered_input_picture[0];
1564             for (i = 0; i < 4; i++) {
1565                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1566             }
1567         }
1568         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1569         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1570                                        s->current_picture_ptr)) < 0)
1571             return ret;
1572
1573         s->picture_number = s->new_picture.f->display_picture_number;
1574     } else {
1575         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1576     }
1577     return 0;
1578 }
1579
1580 static void frame_end(MpegEncContext *s)
1581 {
1582     if (s->unrestricted_mv &&
1583         s->current_picture.reference &&
1584         !s->intra_only) {
1585         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1586         int hshift = desc->log2_chroma_w;
1587         int vshift = desc->log2_chroma_h;
1588         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1589                                 s->current_picture.f->linesize[0],
1590                                 s->h_edge_pos, s->v_edge_pos,
1591                                 EDGE_WIDTH, EDGE_WIDTH,
1592                                 EDGE_TOP | EDGE_BOTTOM);
1593         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1594                                 s->current_picture.f->linesize[1],
1595                                 s->h_edge_pos >> hshift,
1596                                 s->v_edge_pos >> vshift,
1597                                 EDGE_WIDTH >> hshift,
1598                                 EDGE_WIDTH >> vshift,
1599                                 EDGE_TOP | EDGE_BOTTOM);
1600         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1601                                 s->current_picture.f->linesize[2],
1602                                 s->h_edge_pos >> hshift,
1603                                 s->v_edge_pos >> vshift,
1604                                 EDGE_WIDTH >> hshift,
1605                                 EDGE_WIDTH >> vshift,
1606                                 EDGE_TOP | EDGE_BOTTOM);
1607     }
1608
1609     emms_c();
1610
1611     s->last_pict_type                 = s->pict_type;
1612     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1613     if (s->pict_type!= AV_PICTURE_TYPE_B)
1614         s->last_non_b_pict_type = s->pict_type;
1615
1616     s->avctx->coded_frame = s->current_picture_ptr->f;
1617
1618 }
1619
1620 static void update_noise_reduction(MpegEncContext *s)
1621 {
1622     int intra, i;
1623
1624     for (intra = 0; intra < 2; intra++) {
1625         if (s->dct_count[intra] > (1 << 16)) {
1626             for (i = 0; i < 64; i++) {
1627                 s->dct_error_sum[intra][i] >>= 1;
1628             }
1629             s->dct_count[intra] >>= 1;
1630         }
1631
1632         for (i = 0; i < 64; i++) {
1633             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1634                                        s->dct_count[intra] +
1635                                        s->dct_error_sum[intra][i] / 2) /
1636                                       (s->dct_error_sum[intra][i] + 1);
1637         }
1638     }
1639 }
1640
1641 static int frame_start(MpegEncContext *s)
1642 {
1643     int ret;
1644
1645     /* mark & release old frames */
1646     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1647         s->last_picture_ptr != s->next_picture_ptr &&
1648         s->last_picture_ptr->f->buf[0]) {
1649         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1650     }
1651
1652     s->current_picture_ptr->f->pict_type = s->pict_type;
1653     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1654
1655     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1656     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1657                                    s->current_picture_ptr)) < 0)
1658         return ret;
1659
1660     if (s->pict_type != AV_PICTURE_TYPE_B) {
1661         s->last_picture_ptr = s->next_picture_ptr;
1662         if (!s->droppable)
1663             s->next_picture_ptr = s->current_picture_ptr;
1664     }
1665
1666     if (s->last_picture_ptr) {
1667         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1668         if (s->last_picture_ptr->f->buf[0] &&
1669             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1670                                        s->last_picture_ptr)) < 0)
1671             return ret;
1672     }
1673     if (s->next_picture_ptr) {
1674         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1675         if (s->next_picture_ptr->f->buf[0] &&
1676             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1677                                        s->next_picture_ptr)) < 0)
1678             return ret;
1679     }
1680
1681     if (s->picture_structure!= PICT_FRAME) {
1682         int i;
1683         for (i = 0; i < 4; i++) {
1684             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1685                 s->current_picture.f->data[i] +=
1686                     s->current_picture.f->linesize[i];
1687             }
1688             s->current_picture.f->linesize[i] *= 2;
1689             s->last_picture.f->linesize[i]    *= 2;
1690             s->next_picture.f->linesize[i]    *= 2;
1691         }
1692     }
1693
1694     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1695         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1696         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1697     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1698         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1699         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1700     } else {
1701         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1702         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1703     }
1704
1705     if (s->dct_error_sum) {
1706         av_assert2(s->avctx->noise_reduction && s->encoding);
1707         update_noise_reduction(s);
1708     }
1709
1710     return 0;
1711 }
1712
1713 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1714                           const AVFrame *pic_arg, int *got_packet)
1715 {
1716     MpegEncContext *s = avctx->priv_data;
1717     int i, stuffing_count, ret;
1718     int context_count = s->slice_context_count;
1719
1720     s->picture_in_gop_number++;
1721
1722     if (load_input_picture(s, pic_arg) < 0)
1723         return -1;
1724
1725     if (select_input_picture(s) < 0) {
1726         return -1;
1727     }
1728
1729     /* output? */
1730     if (s->new_picture.f->data[0]) {
1731         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1732         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1733                                               :
1734                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1735         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1736             return ret;
1737         if (s->mb_info) {
1738             s->mb_info_ptr = av_packet_new_side_data(pkt,
1739                                  AV_PKT_DATA_H263_MB_INFO,
1740                                  s->mb_width*s->mb_height*12);
1741             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1742         }
1743
1744         for (i = 0; i < context_count; i++) {
1745             int start_y = s->thread_context[i]->start_mb_y;
1746             int   end_y = s->thread_context[i]->  end_mb_y;
1747             int h       = s->mb_height;
1748             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1749             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1750
1751             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1752         }
1753
1754         s->pict_type = s->new_picture.f->pict_type;
1755         //emms_c();
1756         ret = frame_start(s);
1757         if (ret < 0)
1758             return ret;
1759 vbv_retry:
1760         ret = encode_picture(s, s->picture_number);
1761         if (growing_buffer) {
1762             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1763             pkt->data = s->pb.buf;
1764             pkt->size = avctx->internal->byte_buffer_size;
1765         }
1766         if (ret < 0)
1767             return -1;
1768
1769         avctx->header_bits = s->header_bits;
1770         avctx->mv_bits     = s->mv_bits;
1771         avctx->misc_bits   = s->misc_bits;
1772         avctx->i_tex_bits  = s->i_tex_bits;
1773         avctx->p_tex_bits  = s->p_tex_bits;
1774         avctx->i_count     = s->i_count;
1775         // FIXME f/b_count in avctx
1776         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1777         avctx->skip_count  = s->skip_count;
1778
1779         frame_end(s);
1780
1781         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1782             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1783
1784         if (avctx->rc_buffer_size) {
1785             RateControlContext *rcc = &s->rc_context;
1786             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1787
1788             if (put_bits_count(&s->pb) > max_size &&
1789                 s->lambda < s->lmax) {
1790                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1791                                        (s->qscale + 1) / s->qscale);
1792                 if (s->adaptive_quant) {
1793                     int i;
1794                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1795                         s->lambda_table[i] =
1796                             FFMAX(s->lambda_table[i] + 1,
1797                                   s->lambda_table[i] * (s->qscale + 1) /
1798                                   s->qscale);
1799                 }
1800                 s->mb_skipped = 0;        // done in frame_start()
1801                 // done in encode_picture() so we must undo it
1802                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1803                     if (s->flipflop_rounding          ||
1804                         s->codec_id == AV_CODEC_ID_H263P ||
1805                         s->codec_id == AV_CODEC_ID_MPEG4)
1806                         s->no_rounding ^= 1;
1807                 }
1808                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1809                     s->time_base       = s->last_time_base;
1810                     s->last_non_b_time = s->time - s->pp_time;
1811                 }
1812                 for (i = 0; i < context_count; i++) {
1813                     PutBitContext *pb = &s->thread_context[i]->pb;
1814                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1815                 }
1816                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1817                 goto vbv_retry;
1818             }
1819
1820             av_assert0(s->avctx->rc_max_rate);
1821         }
1822
1823         if (s->avctx->flags & CODEC_FLAG_PASS1)
1824             ff_write_pass1_stats(s);
1825
1826         for (i = 0; i < 4; i++) {
1827             s->current_picture_ptr->f->error[i] =
1828             s->current_picture.f->error[i] =
1829                 s->current_picture.error[i];
1830             avctx->error[i] += s->current_picture_ptr->f->error[i];
1831         }
1832
1833         if (s->avctx->flags & CODEC_FLAG_PASS1)
1834             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1835                    avctx->i_tex_bits + avctx->p_tex_bits ==
1836                        put_bits_count(&s->pb));
1837         flush_put_bits(&s->pb);
1838         s->frame_bits  = put_bits_count(&s->pb);
1839
1840         stuffing_count = ff_vbv_update(s, s->frame_bits);
1841         s->stuffing_bits = 8*stuffing_count;
1842         if (stuffing_count) {
1843             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1844                     stuffing_count + 50) {
1845                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1846                 return -1;
1847             }
1848
1849             switch (s->codec_id) {
1850             case AV_CODEC_ID_MPEG1VIDEO:
1851             case AV_CODEC_ID_MPEG2VIDEO:
1852                 while (stuffing_count--) {
1853                     put_bits(&s->pb, 8, 0);
1854                 }
1855             break;
1856             case AV_CODEC_ID_MPEG4:
1857                 put_bits(&s->pb, 16, 0);
1858                 put_bits(&s->pb, 16, 0x1C3);
1859                 stuffing_count -= 4;
1860                 while (stuffing_count--) {
1861                     put_bits(&s->pb, 8, 0xFF);
1862                 }
1863             break;
1864             default:
1865                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1866             }
1867             flush_put_bits(&s->pb);
1868             s->frame_bits  = put_bits_count(&s->pb);
1869         }
1870
1871         /* update mpeg1/2 vbv_delay for CBR */
1872         if (s->avctx->rc_max_rate                          &&
1873             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1874             s->out_format == FMT_MPEG1                     &&
1875             90000LL * (avctx->rc_buffer_size - 1) <=
1876                 s->avctx->rc_max_rate * 0xFFFFLL) {
1877             int vbv_delay, min_delay;
1878             double inbits  = s->avctx->rc_max_rate *
1879                              av_q2d(s->avctx->time_base);
1880             int    minbits = s->frame_bits - 8 *
1881                              (s->vbv_delay_ptr - s->pb.buf - 1);
1882             double bits    = s->rc_context.buffer_index + minbits - inbits;
1883
1884             if (bits < 0)
1885                 av_log(s->avctx, AV_LOG_ERROR,
1886                        "Internal error, negative bits\n");
1887
1888             assert(s->repeat_first_field == 0);
1889
1890             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1891             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1892                         s->avctx->rc_max_rate;
1893
1894             vbv_delay = FFMAX(vbv_delay, min_delay);
1895
1896             av_assert0(vbv_delay < 0xFFFF);
1897
1898             s->vbv_delay_ptr[0] &= 0xF8;
1899             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1900             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1901             s->vbv_delay_ptr[2] &= 0x07;
1902             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1903             avctx->vbv_delay     = vbv_delay * 300;
1904         }
1905         s->total_bits     += s->frame_bits;
1906         avctx->frame_bits  = s->frame_bits;
1907
1908         pkt->pts = s->current_picture.f->pts;
1909         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1910             if (!s->current_picture.f->coded_picture_number)
1911                 pkt->dts = pkt->pts - s->dts_delta;
1912             else
1913                 pkt->dts = s->reordered_pts;
1914             s->reordered_pts = pkt->pts;
1915         } else
1916             pkt->dts = pkt->pts;
1917         if (s->current_picture.f->key_frame)
1918             pkt->flags |= AV_PKT_FLAG_KEY;
1919         if (s->mb_info)
1920             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1921     } else {
1922         s->frame_bits = 0;
1923     }
1924
1925     /* release non-reference frames */
1926     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1927         if (!s->picture[i].reference)
1928             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1929     }
1930
1931     av_assert1((s->frame_bits & 7) == 0);
1932
1933     pkt->size = s->frame_bits / 8;
1934     *got_packet = !!pkt->size;
1935     return 0;
1936 }
1937
1938 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1939                                                 int n, int threshold)
1940 {
1941     static const char tab[64] = {
1942         3, 2, 2, 1, 1, 1, 1, 1,
1943         1, 1, 1, 1, 1, 1, 1, 1,
1944         1, 1, 1, 1, 1, 1, 1, 1,
1945         0, 0, 0, 0, 0, 0, 0, 0,
1946         0, 0, 0, 0, 0, 0, 0, 0,
1947         0, 0, 0, 0, 0, 0, 0, 0,
1948         0, 0, 0, 0, 0, 0, 0, 0,
1949         0, 0, 0, 0, 0, 0, 0, 0
1950     };
1951     int score = 0;
1952     int run = 0;
1953     int i;
1954     int16_t *block = s->block[n];
1955     const int last_index = s->block_last_index[n];
1956     int skip_dc;
1957
1958     if (threshold < 0) {
1959         skip_dc = 0;
1960         threshold = -threshold;
1961     } else
1962         skip_dc = 1;
1963
1964     /* Are all we could set to zero already zero? */
1965     if (last_index <= skip_dc - 1)
1966         return;
1967
1968     for (i = 0; i <= last_index; i++) {
1969         const int j = s->intra_scantable.permutated[i];
1970         const int level = FFABS(block[j]);
1971         if (level == 1) {
1972             if (skip_dc && i == 0)
1973                 continue;
1974             score += tab[run];
1975             run = 0;
1976         } else if (level > 1) {
1977             return;
1978         } else {
1979             run++;
1980         }
1981     }
1982     if (score >= threshold)
1983         return;
1984     for (i = skip_dc; i <= last_index; i++) {
1985         const int j = s->intra_scantable.permutated[i];
1986         block[j] = 0;
1987     }
1988     if (block[0])
1989         s->block_last_index[n] = 0;
1990     else
1991         s->block_last_index[n] = -1;
1992 }
1993
1994 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1995                                int last_index)
1996 {
1997     int i;
1998     const int maxlevel = s->max_qcoeff;
1999     const int minlevel = s->min_qcoeff;
2000     int overflow = 0;
2001
2002     if (s->mb_intra) {
2003         i = 1; // skip clipping of intra dc
2004     } else
2005         i = 0;
2006
2007     for (; i <= last_index; i++) {
2008         const int j = s->intra_scantable.permutated[i];
2009         int level = block[j];
2010
2011         if (level > maxlevel) {
2012             level = maxlevel;
2013             overflow++;
2014         } else if (level < minlevel) {
2015             level = minlevel;
2016             overflow++;
2017         }
2018
2019         block[j] = level;
2020     }
2021
2022     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2023         av_log(s->avctx, AV_LOG_INFO,
2024                "warning, clipping %d dct coefficients to %d..%d\n",
2025                overflow, minlevel, maxlevel);
2026 }
2027
2028 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2029 {
2030     int x, y;
2031     // FIXME optimize
2032     for (y = 0; y < 8; y++) {
2033         for (x = 0; x < 8; x++) {
2034             int x2, y2;
2035             int sum = 0;
2036             int sqr = 0;
2037             int count = 0;
2038
2039             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2040                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2041                     int v = ptr[x2 + y2 * stride];
2042                     sum += v;
2043                     sqr += v * v;
2044                     count++;
2045                 }
2046             }
2047             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2048         }
2049     }
2050 }
2051
2052 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2053                                                 int motion_x, int motion_y,
2054                                                 int mb_block_height,
2055                                                 int mb_block_width,
2056                                                 int mb_block_count)
2057 {
2058     int16_t weight[12][64];
2059     int16_t orig[12][64];
2060     const int mb_x = s->mb_x;
2061     const int mb_y = s->mb_y;
2062     int i;
2063     int skip_dct[12];
2064     int dct_offset = s->linesize * 8; // default for progressive frames
2065     int uv_dct_offset = s->uvlinesize * 8;
2066     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2067     ptrdiff_t wrap_y, wrap_c;
2068
2069     for (i = 0; i < mb_block_count; i++)
2070         skip_dct[i] = s->skipdct;
2071
2072     if (s->adaptive_quant) {
2073         const int last_qp = s->qscale;
2074         const int mb_xy = mb_x + mb_y * s->mb_stride;
2075
2076         s->lambda = s->lambda_table[mb_xy];
2077         update_qscale(s);
2078
2079         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2080             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2081             s->dquant = s->qscale - last_qp;
2082
2083             if (s->out_format == FMT_H263) {
2084                 s->dquant = av_clip(s->dquant, -2, 2);
2085
2086                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2087                     if (!s->mb_intra) {
2088                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2089                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2090                                 s->dquant = 0;
2091                         }
2092                         if (s->mv_type == MV_TYPE_8X8)
2093                             s->dquant = 0;
2094                     }
2095                 }
2096             }
2097         }
2098         ff_set_qscale(s, last_qp + s->dquant);
2099     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2100         ff_set_qscale(s, s->qscale + s->dquant);
2101
2102     wrap_y = s->linesize;
2103     wrap_c = s->uvlinesize;
2104     ptr_y  = s->new_picture.f->data[0] +
2105              (mb_y * 16 * wrap_y)              + mb_x * 16;
2106     ptr_cb = s->new_picture.f->data[1] +
2107              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2108     ptr_cr = s->new_picture.f->data[2] +
2109              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2110
2111     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2112         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2113         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2114         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2115         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2116                                  wrap_y, wrap_y,
2117                                  16, 16, mb_x * 16, mb_y * 16,
2118                                  s->width, s->height);
2119         ptr_y = ebuf;
2120         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2121                                  wrap_c, wrap_c,
2122                                  mb_block_width, mb_block_height,
2123                                  mb_x * mb_block_width, mb_y * mb_block_height,
2124                                  cw, ch);
2125         ptr_cb = ebuf + 16 * wrap_y;
2126         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2127                                  wrap_c, wrap_c,
2128                                  mb_block_width, mb_block_height,
2129                                  mb_x * mb_block_width, mb_y * mb_block_height,
2130                                  cw, ch);
2131         ptr_cr = ebuf + 16 * wrap_y + 16;
2132     }
2133
2134     if (s->mb_intra) {
2135         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2136             int progressive_score, interlaced_score;
2137
2138             s->interlaced_dct = 0;
2139             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2140                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2141                                                      NULL, wrap_y, 8) - 400;
2142
2143             if (progressive_score > 0) {
2144                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2145                                                         NULL, wrap_y * 2, 8) +
2146                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2147                                                         NULL, wrap_y * 2, 8);
2148                 if (progressive_score > interlaced_score) {
2149                     s->interlaced_dct = 1;
2150
2151                     dct_offset = wrap_y;
2152                     uv_dct_offset = wrap_c;
2153                     wrap_y <<= 1;
2154                     if (s->chroma_format == CHROMA_422 ||
2155                         s->chroma_format == CHROMA_444)
2156                         wrap_c <<= 1;
2157                 }
2158             }
2159         }
2160
2161         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2162         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2163         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2164         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2165
2166         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2167             skip_dct[4] = 1;
2168             skip_dct[5] = 1;
2169         } else {
2170             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2171             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2172             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2173                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2174                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2175             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2176                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2177                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2178                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2179                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2180                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2181                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2182             }
2183         }
2184     } else {
2185         op_pixels_func (*op_pix)[4];
2186         qpel_mc_func (*op_qpix)[16];
2187         uint8_t *dest_y, *dest_cb, *dest_cr;
2188
2189         dest_y  = s->dest[0];
2190         dest_cb = s->dest[1];
2191         dest_cr = s->dest[2];
2192
2193         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2194             op_pix  = s->hdsp.put_pixels_tab;
2195             op_qpix = s->qdsp.put_qpel_pixels_tab;
2196         } else {
2197             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2198             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2199         }
2200
2201         if (s->mv_dir & MV_DIR_FORWARD) {
2202             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2203                           s->last_picture.f->data,
2204                           op_pix, op_qpix);
2205             op_pix  = s->hdsp.avg_pixels_tab;
2206             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2207         }
2208         if (s->mv_dir & MV_DIR_BACKWARD) {
2209             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2210                           s->next_picture.f->data,
2211                           op_pix, op_qpix);
2212         }
2213
2214         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2215             int progressive_score, interlaced_score;
2216
2217             s->interlaced_dct = 0;
2218             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2219                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2220                                                      ptr_y + wrap_y * 8,
2221                                                      wrap_y, 8) - 400;
2222
2223             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2224                 progressive_score -= 400;
2225
2226             if (progressive_score > 0) {
2227                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2228                                                         wrap_y * 2, 8) +
2229                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2230                                                         ptr_y + wrap_y,
2231                                                         wrap_y * 2, 8);
2232
2233                 if (progressive_score > interlaced_score) {
2234                     s->interlaced_dct = 1;
2235
2236                     dct_offset = wrap_y;
2237                     uv_dct_offset = wrap_c;
2238                     wrap_y <<= 1;
2239                     if (s->chroma_format == CHROMA_422)
2240                         wrap_c <<= 1;
2241                 }
2242             }
2243         }
2244
2245         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2246         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2247         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2248                             dest_y + dct_offset, wrap_y);
2249         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2250                             dest_y + dct_offset + 8, wrap_y);
2251
2252         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2253             skip_dct[4] = 1;
2254             skip_dct[5] = 1;
2255         } else {
2256             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2257             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2258             if (!s->chroma_y_shift) { /* 422 */
2259                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2260                                     dest_cb + uv_dct_offset, wrap_c);
2261                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2262                                     dest_cr + uv_dct_offset, wrap_c);
2263             }
2264         }
2265         /* pre quantization */
2266         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2267                 2 * s->qscale * s->qscale) {
2268             // FIXME optimize
2269             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2270                 skip_dct[0] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2272                 skip_dct[1] = 1;
2273             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2274                                wrap_y, 8) < 20 * s->qscale)
2275                 skip_dct[2] = 1;
2276             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2277                                wrap_y, 8) < 20 * s->qscale)
2278                 skip_dct[3] = 1;
2279             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2280                 skip_dct[4] = 1;
2281             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2282                 skip_dct[5] = 1;
2283             if (!s->chroma_y_shift) { /* 422 */
2284                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2285                                    dest_cb + uv_dct_offset,
2286                                    wrap_c, 8) < 20 * s->qscale)
2287                     skip_dct[6] = 1;
2288                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2289                                    dest_cr + uv_dct_offset,
2290                                    wrap_c, 8) < 20 * s->qscale)
2291                     skip_dct[7] = 1;
2292             }
2293         }
2294     }
2295
2296     if (s->quantizer_noise_shaping) {
2297         if (!skip_dct[0])
2298             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2299         if (!skip_dct[1])
2300             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2301         if (!skip_dct[2])
2302             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2303         if (!skip_dct[3])
2304             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2305         if (!skip_dct[4])
2306             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2307         if (!skip_dct[5])
2308             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2309         if (!s->chroma_y_shift) { /* 422 */
2310             if (!skip_dct[6])
2311                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2312                                   wrap_c);
2313             if (!skip_dct[7])
2314                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2315                                   wrap_c);
2316         }
2317         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2318     }
2319
2320     /* DCT & quantize */
2321     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2322     {
2323         for (i = 0; i < mb_block_count; i++) {
2324             if (!skip_dct[i]) {
2325                 int overflow;
2326                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2327                 // FIXME we could decide to change to quantizer instead of
2328                 // clipping
2329                 // JS: I don't think that would be a good idea it could lower
2330                 //     quality instead of improve it. Just INTRADC clipping
2331                 //     deserves changes in quantizer
2332                 if (overflow)
2333                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2334             } else
2335                 s->block_last_index[i] = -1;
2336         }
2337         if (s->quantizer_noise_shaping) {
2338             for (i = 0; i < mb_block_count; i++) {
2339                 if (!skip_dct[i]) {
2340                     s->block_last_index[i] =
2341                         dct_quantize_refine(s, s->block[i], weight[i],
2342                                             orig[i], i, s->qscale);
2343                 }
2344             }
2345         }
2346
2347         if (s->luma_elim_threshold && !s->mb_intra)
2348             for (i = 0; i < 4; i++)
2349                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2350         if (s->chroma_elim_threshold && !s->mb_intra)
2351             for (i = 4; i < mb_block_count; i++)
2352                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2353
2354         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2355             for (i = 0; i < mb_block_count; i++) {
2356                 if (s->block_last_index[i] == -1)
2357                     s->coded_score[i] = INT_MAX / 256;
2358             }
2359         }
2360     }
2361
2362     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2363         s->block_last_index[4] =
2364         s->block_last_index[5] = 0;
2365         s->block[4][0] =
2366         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2367         if (!s->chroma_y_shift) { /* 422 / 444 */
2368             for (i=6; i<12; i++) {
2369                 s->block_last_index[i] = 0;
2370                 s->block[i][0] = s->block[4][0];
2371             }
2372         }
2373     }
2374
2375     // non c quantize code returns incorrect block_last_index FIXME
2376     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2377         for (i = 0; i < mb_block_count; i++) {
2378             int j;
2379             if (s->block_last_index[i] > 0) {
2380                 for (j = 63; j > 0; j--) {
2381                     if (s->block[i][s->intra_scantable.permutated[j]])
2382                         break;
2383                 }
2384                 s->block_last_index[i] = j;
2385             }
2386         }
2387     }
2388
2389     /* huffman encode */
2390     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2391     case AV_CODEC_ID_MPEG1VIDEO:
2392     case AV_CODEC_ID_MPEG2VIDEO:
2393         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2394             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2395         break;
2396     case AV_CODEC_ID_MPEG4:
2397         if (CONFIG_MPEG4_ENCODER)
2398             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2399         break;
2400     case AV_CODEC_ID_MSMPEG4V2:
2401     case AV_CODEC_ID_MSMPEG4V3:
2402     case AV_CODEC_ID_WMV1:
2403         if (CONFIG_MSMPEG4_ENCODER)
2404             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2405         break;
2406     case AV_CODEC_ID_WMV2:
2407         if (CONFIG_WMV2_ENCODER)
2408             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2409         break;
2410     case AV_CODEC_ID_H261:
2411         if (CONFIG_H261_ENCODER)
2412             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2413         break;
2414     case AV_CODEC_ID_H263:
2415     case AV_CODEC_ID_H263P:
2416     case AV_CODEC_ID_FLV1:
2417     case AV_CODEC_ID_RV10:
2418     case AV_CODEC_ID_RV20:
2419         if (CONFIG_H263_ENCODER)
2420             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2421         break;
2422     case AV_CODEC_ID_MJPEG:
2423     case AV_CODEC_ID_AMV:
2424         if (CONFIG_MJPEG_ENCODER)
2425             ff_mjpeg_encode_mb(s, s->block);
2426         break;
2427     default:
2428         av_assert1(0);
2429     }
2430 }
2431
2432 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2433 {
2434     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2435     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2436     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2437 }
2438
2439 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2440     int i;
2441
2442     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2443
2444     /* mpeg1 */
2445     d->mb_skip_run= s->mb_skip_run;
2446     for(i=0; i<3; i++)
2447         d->last_dc[i] = s->last_dc[i];
2448
2449     /* statistics */
2450     d->mv_bits= s->mv_bits;
2451     d->i_tex_bits= s->i_tex_bits;
2452     d->p_tex_bits= s->p_tex_bits;
2453     d->i_count= s->i_count;
2454     d->f_count= s->f_count;
2455     d->b_count= s->b_count;
2456     d->skip_count= s->skip_count;
2457     d->misc_bits= s->misc_bits;
2458     d->last_bits= 0;
2459
2460     d->mb_skipped= 0;
2461     d->qscale= s->qscale;
2462     d->dquant= s->dquant;
2463
2464     d->esc3_level_length= s->esc3_level_length;
2465 }
2466
2467 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2468     int i;
2469
2470     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2471     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2472
2473     /* mpeg1 */
2474     d->mb_skip_run= s->mb_skip_run;
2475     for(i=0; i<3; i++)
2476         d->last_dc[i] = s->last_dc[i];
2477
2478     /* statistics */
2479     d->mv_bits= s->mv_bits;
2480     d->i_tex_bits= s->i_tex_bits;
2481     d->p_tex_bits= s->p_tex_bits;
2482     d->i_count= s->i_count;
2483     d->f_count= s->f_count;
2484     d->b_count= s->b_count;
2485     d->skip_count= s->skip_count;
2486     d->misc_bits= s->misc_bits;
2487
2488     d->mb_intra= s->mb_intra;
2489     d->mb_skipped= s->mb_skipped;
2490     d->mv_type= s->mv_type;
2491     d->mv_dir= s->mv_dir;
2492     d->pb= s->pb;
2493     if(s->data_partitioning){
2494         d->pb2= s->pb2;
2495         d->tex_pb= s->tex_pb;
2496     }
2497     d->block= s->block;
2498     for(i=0; i<8; i++)
2499         d->block_last_index[i]= s->block_last_index[i];
2500     d->interlaced_dct= s->interlaced_dct;
2501     d->qscale= s->qscale;
2502
2503     d->esc3_level_length= s->esc3_level_length;
2504 }
2505
2506 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2507                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2508                            int *dmin, int *next_block, int motion_x, int motion_y)
2509 {
2510     int score;
2511     uint8_t *dest_backup[3];
2512
2513     copy_context_before_encode(s, backup, type);
2514
2515     s->block= s->blocks[*next_block];
2516     s->pb= pb[*next_block];
2517     if(s->data_partitioning){
2518         s->pb2   = pb2   [*next_block];
2519         s->tex_pb= tex_pb[*next_block];
2520     }
2521
2522     if(*next_block){
2523         memcpy(dest_backup, s->dest, sizeof(s->dest));
2524         s->dest[0] = s->sc.rd_scratchpad;
2525         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2526         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2527         av_assert0(s->linesize >= 32); //FIXME
2528     }
2529
2530     encode_mb(s, motion_x, motion_y);
2531
2532     score= put_bits_count(&s->pb);
2533     if(s->data_partitioning){
2534         score+= put_bits_count(&s->pb2);
2535         score+= put_bits_count(&s->tex_pb);
2536     }
2537
2538     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2539         ff_mpv_decode_mb(s, s->block);
2540
2541         score *= s->lambda2;
2542         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2543     }
2544
2545     if(*next_block){
2546         memcpy(s->dest, dest_backup, sizeof(s->dest));
2547     }
2548
2549     if(score<*dmin){
2550         *dmin= score;
2551         *next_block^=1;
2552
2553         copy_context_after_encode(best, s, type);
2554     }
2555 }
2556
2557 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2558     uint32_t *sq = ff_square_tab + 256;
2559     int acc=0;
2560     int x,y;
2561
2562     if(w==16 && h==16)
2563         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2564     else if(w==8 && h==8)
2565         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2566
2567     for(y=0; y<h; y++){
2568         for(x=0; x<w; x++){
2569             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2570         }
2571     }
2572
2573     av_assert2(acc>=0);
2574
2575     return acc;
2576 }
2577
2578 static int sse_mb(MpegEncContext *s){
2579     int w= 16;
2580     int h= 16;
2581
2582     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2583     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2584
2585     if(w==16 && h==16)
2586       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2587         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2588                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2589                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2590       }else{
2591         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2592                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2593                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2594       }
2595     else
2596         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2597                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2598                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2599 }
2600
2601 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2602     MpegEncContext *s= *(void**)arg;
2603
2604
2605     s->me.pre_pass=1;
2606     s->me.dia_size= s->avctx->pre_dia_size;
2607     s->first_slice_line=1;
2608     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2609         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2610             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2611         }
2612         s->first_slice_line=0;
2613     }
2614
2615     s->me.pre_pass=0;
2616
2617     return 0;
2618 }
2619
2620 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2621     MpegEncContext *s= *(void**)arg;
2622
2623     ff_check_alignment();
2624
2625     s->me.dia_size= s->avctx->dia_size;
2626     s->first_slice_line=1;
2627     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2628         s->mb_x=0; //for block init below
2629         ff_init_block_index(s);
2630         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2631             s->block_index[0]+=2;
2632             s->block_index[1]+=2;
2633             s->block_index[2]+=2;
2634             s->block_index[3]+=2;
2635
2636             /* compute motion vector & mb_type and store in context */
2637             if(s->pict_type==AV_PICTURE_TYPE_B)
2638                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2639             else
2640                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2641         }
2642         s->first_slice_line=0;
2643     }
2644     return 0;
2645 }
2646
2647 static int mb_var_thread(AVCodecContext *c, void *arg){
2648     MpegEncContext *s= *(void**)arg;
2649     int mb_x, mb_y;
2650
2651     ff_check_alignment();
2652
2653     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2654         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2655             int xx = mb_x * 16;
2656             int yy = mb_y * 16;
2657             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2658             int varc;
2659             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2660
2661             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2662                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2663
2664             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2665             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2666             s->me.mb_var_sum_temp    += varc;
2667         }
2668     }
2669     return 0;
2670 }
2671
2672 static void write_slice_end(MpegEncContext *s){
2673     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2674         if(s->partitioned_frame){
2675             ff_mpeg4_merge_partitions(s);
2676         }
2677
2678         ff_mpeg4_stuffing(&s->pb);
2679     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2680         ff_mjpeg_encode_stuffing(s);
2681     }
2682
2683     avpriv_align_put_bits(&s->pb);
2684     flush_put_bits(&s->pb);
2685
2686     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2687         s->misc_bits+= get_bits_diff(s);
2688 }
2689
2690 static void write_mb_info(MpegEncContext *s)
2691 {
2692     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2693     int offset = put_bits_count(&s->pb);
2694     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2695     int gobn = s->mb_y / s->gob_index;
2696     int pred_x, pred_y;
2697     if (CONFIG_H263_ENCODER)
2698         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2699     bytestream_put_le32(&ptr, offset);
2700     bytestream_put_byte(&ptr, s->qscale);
2701     bytestream_put_byte(&ptr, gobn);
2702     bytestream_put_le16(&ptr, mba);
2703     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2704     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2705     /* 4MV not implemented */
2706     bytestream_put_byte(&ptr, 0); /* hmv2 */
2707     bytestream_put_byte(&ptr, 0); /* vmv2 */
2708 }
2709
2710 static void update_mb_info(MpegEncContext *s, int startcode)
2711 {
2712     if (!s->mb_info)
2713         return;
2714     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2715         s->mb_info_size += 12;
2716         s->prev_mb_info = s->last_mb_info;
2717     }
2718     if (startcode) {
2719         s->prev_mb_info = put_bits_count(&s->pb)/8;
2720         /* This might have incremented mb_info_size above, and we return without
2721          * actually writing any info into that slot yet. But in that case,
2722          * this will be called again at the start of the after writing the
2723          * start code, actually writing the mb info. */
2724         return;
2725     }
2726
2727     s->last_mb_info = put_bits_count(&s->pb)/8;
2728     if (!s->mb_info_size)
2729         s->mb_info_size += 12;
2730     write_mb_info(s);
2731 }
2732
2733 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2734 {
2735     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2736         && s->slice_context_count == 1
2737         && s->pb.buf == s->avctx->internal->byte_buffer) {
2738         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2739         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2740
2741         uint8_t *new_buffer = NULL;
2742         int new_buffer_size = 0;
2743
2744         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2745                               s->avctx->internal->byte_buffer_size + size_increase);
2746         if (!new_buffer)
2747             return AVERROR(ENOMEM);
2748
2749         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2750         av_free(s->avctx->internal->byte_buffer);
2751         s->avctx->internal->byte_buffer      = new_buffer;
2752         s->avctx->internal->byte_buffer_size = new_buffer_size;
2753         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2754         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2755         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2756     }
2757     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2758         return AVERROR(EINVAL);
2759     return 0;
2760 }
2761
2762 static int encode_thread(AVCodecContext *c, void *arg){
2763     MpegEncContext *s= *(void**)arg;
2764     int mb_x, mb_y, pdif = 0;
2765     int chr_h= 16>>s->chroma_y_shift;
2766     int i, j;
2767     MpegEncContext best_s = { 0 }, backup_s;
2768     uint8_t bit_buf[2][MAX_MB_BYTES];
2769     uint8_t bit_buf2[2][MAX_MB_BYTES];
2770     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2771     PutBitContext pb[2], pb2[2], tex_pb[2];
2772
2773     ff_check_alignment();
2774
2775     for(i=0; i<2; i++){
2776         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2777         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2778         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2779     }
2780
2781     s->last_bits= put_bits_count(&s->pb);
2782     s->mv_bits=0;
2783     s->misc_bits=0;
2784     s->i_tex_bits=0;
2785     s->p_tex_bits=0;
2786     s->i_count=0;
2787     s->f_count=0;
2788     s->b_count=0;
2789     s->skip_count=0;
2790
2791     for(i=0; i<3; i++){
2792         /* init last dc values */
2793         /* note: quant matrix value (8) is implied here */
2794         s->last_dc[i] = 128 << s->intra_dc_precision;
2795
2796         s->current_picture.error[i] = 0;
2797     }
2798     if(s->codec_id==AV_CODEC_ID_AMV){
2799         s->last_dc[0] = 128*8/13;
2800         s->last_dc[1] = 128*8/14;
2801         s->last_dc[2] = 128*8/14;
2802     }
2803     s->mb_skip_run = 0;
2804     memset(s->last_mv, 0, sizeof(s->last_mv));
2805
2806     s->last_mv_dir = 0;
2807
2808     switch(s->codec_id){
2809     case AV_CODEC_ID_H263:
2810     case AV_CODEC_ID_H263P:
2811     case AV_CODEC_ID_FLV1:
2812         if (CONFIG_H263_ENCODER)
2813             s->gob_index = H263_GOB_HEIGHT(s->height);
2814         break;
2815     case AV_CODEC_ID_MPEG4:
2816         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2817             ff_mpeg4_init_partitions(s);
2818         break;
2819     }
2820
2821     s->resync_mb_x=0;
2822     s->resync_mb_y=0;
2823     s->first_slice_line = 1;
2824     s->ptr_lastgob = s->pb.buf;
2825     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2826         s->mb_x=0;
2827         s->mb_y= mb_y;
2828
2829         ff_set_qscale(s, s->qscale);
2830         ff_init_block_index(s);
2831
2832         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2833             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2834             int mb_type= s->mb_type[xy];
2835 //            int d;
2836             int dmin= INT_MAX;
2837             int dir;
2838             int size_increase =  s->avctx->internal->byte_buffer_size/4
2839                                + s->mb_width*MAX_MB_BYTES;
2840
2841             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2842             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2843                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2844                 return -1;
2845             }
2846             if(s->data_partitioning){
2847                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2848                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2849                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2850                     return -1;
2851                 }
2852             }
2853
2854             s->mb_x = mb_x;
2855             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2856             ff_update_block_index(s);
2857
2858             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2859                 ff_h261_reorder_mb_index(s);
2860                 xy= s->mb_y*s->mb_stride + s->mb_x;
2861                 mb_type= s->mb_type[xy];
2862             }
2863
2864             /* write gob / video packet header  */
2865             if(s->rtp_mode){
2866                 int current_packet_size, is_gob_start;
2867
2868                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2869
2870                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2871
2872                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2873
2874                 switch(s->codec_id){
2875                 case AV_CODEC_ID_H263:
2876                 case AV_CODEC_ID_H263P:
2877                     if(!s->h263_slice_structured)
2878                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2879                     break;
2880                 case AV_CODEC_ID_MPEG2VIDEO:
2881                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2882                 case AV_CODEC_ID_MPEG1VIDEO:
2883                     if(s->mb_skip_run) is_gob_start=0;
2884                     break;
2885                 case AV_CODEC_ID_MJPEG:
2886                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2887                     break;
2888                 }
2889
2890                 if(is_gob_start){
2891                     if(s->start_mb_y != mb_y || mb_x!=0){
2892                         write_slice_end(s);
2893
2894                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2895                             ff_mpeg4_init_partitions(s);
2896                         }
2897                     }
2898
2899                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2900                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2901
2902                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2903                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2904                         int d = 100 / s->error_rate;
2905                         if(r % d == 0){
2906                             current_packet_size=0;
2907                             s->pb.buf_ptr= s->ptr_lastgob;
2908                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2909                         }
2910                     }
2911
2912                     if (s->avctx->rtp_callback){
2913                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2914                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2915                     }
2916                     update_mb_info(s, 1);
2917
2918                     switch(s->codec_id){
2919                     case AV_CODEC_ID_MPEG4:
2920                         if (CONFIG_MPEG4_ENCODER) {
2921                             ff_mpeg4_encode_video_packet_header(s);
2922                             ff_mpeg4_clean_buffers(s);
2923                         }
2924                     break;
2925                     case AV_CODEC_ID_MPEG1VIDEO:
2926                     case AV_CODEC_ID_MPEG2VIDEO:
2927                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2928                             ff_mpeg1_encode_slice_header(s);
2929                             ff_mpeg1_clean_buffers(s);
2930                         }
2931                     break;
2932                     case AV_CODEC_ID_H263:
2933                     case AV_CODEC_ID_H263P:
2934                         if (CONFIG_H263_ENCODER)
2935                             ff_h263_encode_gob_header(s, mb_y);
2936                     break;
2937                     }
2938
2939                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2940                         int bits= put_bits_count(&s->pb);
2941                         s->misc_bits+= bits - s->last_bits;
2942                         s->last_bits= bits;
2943                     }
2944
2945                     s->ptr_lastgob += current_packet_size;
2946                     s->first_slice_line=1;
2947                     s->resync_mb_x=mb_x;
2948                     s->resync_mb_y=mb_y;
2949                 }
2950             }
2951
2952             if(  (s->resync_mb_x   == s->mb_x)
2953                && s->resync_mb_y+1 == s->mb_y){
2954                 s->first_slice_line=0;
2955             }
2956
2957             s->mb_skipped=0;
2958             s->dquant=0; //only for QP_RD
2959
2960             update_mb_info(s, 0);
2961
2962             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2963                 int next_block=0;
2964                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2965
2966                 copy_context_before_encode(&backup_s, s, -1);
2967                 backup_s.pb= s->pb;
2968                 best_s.data_partitioning= s->data_partitioning;
2969                 best_s.partitioned_frame= s->partitioned_frame;
2970                 if(s->data_partitioning){
2971                     backup_s.pb2= s->pb2;
2972                     backup_s.tex_pb= s->tex_pb;
2973                 }
2974
2975                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2976                     s->mv_dir = MV_DIR_FORWARD;
2977                     s->mv_type = MV_TYPE_16X16;
2978                     s->mb_intra= 0;
2979                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2980                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2981                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2982                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2983                 }
2984                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2985                     s->mv_dir = MV_DIR_FORWARD;
2986                     s->mv_type = MV_TYPE_FIELD;
2987                     s->mb_intra= 0;
2988                     for(i=0; i<2; i++){
2989                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2990                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2991                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2992                     }
2993                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2994                                  &dmin, &next_block, 0, 0);
2995                 }
2996                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2997                     s->mv_dir = MV_DIR_FORWARD;
2998                     s->mv_type = MV_TYPE_16X16;
2999                     s->mb_intra= 0;
3000                     s->mv[0][0][0] = 0;
3001                     s->mv[0][0][1] = 0;
3002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3003                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3004                 }
3005                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mv_type = MV_TYPE_8X8;
3008                     s->mb_intra= 0;
3009                     for(i=0; i<4; i++){
3010                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3011                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3012                     }
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, 0, 0);
3015                 }
3016                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3017                     s->mv_dir = MV_DIR_FORWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3024                 }
3025                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3026                     s->mv_dir = MV_DIR_BACKWARD;
3027                     s->mv_type = MV_TYPE_16X16;
3028                     s->mb_intra= 0;
3029                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3030                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3031                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3032                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3033                 }
3034                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3035                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3036                     s->mv_type = MV_TYPE_16X16;
3037                     s->mb_intra= 0;
3038                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3039                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3040                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3041                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3042                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3043                                  &dmin, &next_block, 0, 0);
3044                 }
3045                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3046                     s->mv_dir = MV_DIR_FORWARD;
3047                     s->mv_type = MV_TYPE_FIELD;
3048                     s->mb_intra= 0;
3049                     for(i=0; i<2; i++){
3050                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3051                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3052                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3053                     }
3054                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3055                                  &dmin, &next_block, 0, 0);
3056                 }
3057                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3058                     s->mv_dir = MV_DIR_BACKWARD;
3059                     s->mv_type = MV_TYPE_FIELD;
3060                     s->mb_intra= 0;
3061                     for(i=0; i<2; i++){
3062                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3063                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3064                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3065                     }
3066                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3067                                  &dmin, &next_block, 0, 0);
3068                 }
3069                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3070                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3071                     s->mv_type = MV_TYPE_FIELD;
3072                     s->mb_intra= 0;
3073                     for(dir=0; dir<2; dir++){
3074                         for(i=0; i<2; i++){
3075                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3076                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3077                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3078                         }
3079                     }
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, 0, 0);
3082                 }
3083                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3084                     s->mv_dir = 0;
3085                     s->mv_type = MV_TYPE_16X16;
3086                     s->mb_intra= 1;
3087                     s->mv[0][0][0] = 0;
3088                     s->mv[0][0][1] = 0;
3089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3090                                  &dmin, &next_block, 0, 0);
3091                     if(s->h263_pred || s->h263_aic){
3092                         if(best_s.mb_intra)
3093                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3094                         else
3095                             ff_clean_intra_table_entries(s); //old mode?
3096                     }
3097                 }
3098
3099                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3100                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3101                         const int last_qp= backup_s.qscale;
3102                         int qpi, qp, dc[6];
3103                         int16_t ac[6][16];
3104                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3105                         static const int dquant_tab[4]={-1,1,-2,2};
3106                         int storecoefs = s->mb_intra && s->dc_val[0];
3107
3108                         av_assert2(backup_s.dquant == 0);
3109
3110                         //FIXME intra
3111                         s->mv_dir= best_s.mv_dir;
3112                         s->mv_type = MV_TYPE_16X16;
3113                         s->mb_intra= best_s.mb_intra;
3114                         s->mv[0][0][0] = best_s.mv[0][0][0];
3115                         s->mv[0][0][1] = best_s.mv[0][0][1];
3116                         s->mv[1][0][0] = best_s.mv[1][0][0];
3117                         s->mv[1][0][1] = best_s.mv[1][0][1];
3118
3119                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3120                         for(; qpi<4; qpi++){
3121                             int dquant= dquant_tab[qpi];
3122                             qp= last_qp + dquant;
3123                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3124                                 continue;
3125                             backup_s.dquant= dquant;
3126                             if(storecoefs){
3127                                 for(i=0; i<6; i++){
3128                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3129                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3130                                 }
3131                             }
3132
3133                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3134                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3135                             if(best_s.qscale != qp){
3136                                 if(storecoefs){
3137                                     for(i=0; i<6; i++){
3138                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3139                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3140                                     }
3141                                 }
3142                             }
3143                         }
3144                     }
3145                 }
3146                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3147                     int mx= s->b_direct_mv_table[xy][0];
3148                     int my= s->b_direct_mv_table[xy][1];
3149
3150                     backup_s.dquant = 0;
3151                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3152                     s->mb_intra= 0;
3153                     ff_mpeg4_set_direct_mv(s, mx, my);
3154                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3155                                  &dmin, &next_block, mx, my);
3156                 }
3157                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3158                     backup_s.dquant = 0;
3159                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3160                     s->mb_intra= 0;
3161                     ff_mpeg4_set_direct_mv(s, 0, 0);
3162                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3163                                  &dmin, &next_block, 0, 0);
3164                 }
3165                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3166                     int coded=0;
3167                     for(i=0; i<6; i++)
3168                         coded |= s->block_last_index[i];
3169                     if(coded){
3170                         int mx,my;
3171                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3172                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3173                             mx=my=0; //FIXME find the one we actually used
3174                             ff_mpeg4_set_direct_mv(s, mx, my);
3175                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3176                             mx= s->mv[1][0][0];
3177                             my= s->mv[1][0][1];
3178                         }else{
3179                             mx= s->mv[0][0][0];
3180                             my= s->mv[0][0][1];
3181                         }
3182
3183                         s->mv_dir= best_s.mv_dir;
3184                         s->mv_type = best_s.mv_type;
3185                         s->mb_intra= 0;
3186 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3187                         s->mv[0][0][1] = best_s.mv[0][0][1];
3188                         s->mv[1][0][0] = best_s.mv[1][0][0];
3189                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3190                         backup_s.dquant= 0;
3191                         s->skipdct=1;
3192                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3193                                         &dmin, &next_block, mx, my);
3194                         s->skipdct=0;
3195                     }
3196                 }
3197
3198                 s->current_picture.qscale_table[xy] = best_s.qscale;
3199
3200                 copy_context_after_encode(s, &best_s, -1);
3201
3202                 pb_bits_count= put_bits_count(&s->pb);
3203                 flush_put_bits(&s->pb);
3204                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3205                 s->pb= backup_s.pb;
3206
3207                 if(s->data_partitioning){
3208                     pb2_bits_count= put_bits_count(&s->pb2);
3209                     flush_put_bits(&s->pb2);
3210                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3211                     s->pb2= backup_s.pb2;
3212
3213                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3214                     flush_put_bits(&s->tex_pb);
3215                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3216                     s->tex_pb= backup_s.tex_pb;
3217                 }
3218                 s->last_bits= put_bits_count(&s->pb);
3219
3220                 if (CONFIG_H263_ENCODER &&
3221                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3222                     ff_h263_update_motion_val(s);
3223
3224                 if(next_block==0){ //FIXME 16 vs linesize16
3225                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3226                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3227                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3228                 }
3229
3230                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3231                     ff_mpv_decode_mb(s, s->block);
3232             } else {
3233                 int motion_x = 0, motion_y = 0;
3234                 s->mv_type=MV_TYPE_16X16;
3235                 // only one MB-Type possible
3236
3237                 switch(mb_type){
3238                 case CANDIDATE_MB_TYPE_INTRA:
3239                     s->mv_dir = 0;
3240                     s->mb_intra= 1;
3241                     motion_x= s->mv[0][0][0] = 0;
3242                     motion_y= s->mv[0][0][1] = 0;
3243                     break;
3244                 case CANDIDATE_MB_TYPE_INTER:
3245                     s->mv_dir = MV_DIR_FORWARD;
3246                     s->mb_intra= 0;
3247                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3248                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3249                     break;
3250                 case CANDIDATE_MB_TYPE_INTER_I:
3251                     s->mv_dir = MV_DIR_FORWARD;
3252                     s->mv_type = MV_TYPE_FIELD;
3253                     s->mb_intra= 0;
3254                     for(i=0; i<2; i++){
3255                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3256                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3257                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3258                     }
3259                     break;
3260                 case CANDIDATE_MB_TYPE_INTER4V:
3261                     s->mv_dir = MV_DIR_FORWARD;
3262                     s->mv_type = MV_TYPE_8X8;
3263                     s->mb_intra= 0;
3264                     for(i=0; i<4; i++){
3265                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3266                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3267                     }
3268                     break;
3269                 case CANDIDATE_MB_TYPE_DIRECT:
3270                     if (CONFIG_MPEG4_ENCODER) {
3271                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3272                         s->mb_intra= 0;
3273                         motion_x=s->b_direct_mv_table[xy][0];
3274                         motion_y=s->b_direct_mv_table[xy][1];
3275                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3276                     }
3277                     break;
3278                 case CANDIDATE_MB_TYPE_DIRECT0:
3279                     if (CONFIG_MPEG4_ENCODER) {
3280                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3281                         s->mb_intra= 0;
3282                         ff_mpeg4_set_direct_mv(s, 0, 0);
3283                     }
3284                     break;
3285                 case CANDIDATE_MB_TYPE_BIDIR:
3286                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3287                     s->mb_intra= 0;
3288                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3289                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3290                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3291                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3292                     break;
3293                 case CANDIDATE_MB_TYPE_BACKWARD:
3294                     s->mv_dir = MV_DIR_BACKWARD;
3295                     s->mb_intra= 0;
3296                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3297                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3298                     break;
3299                 case CANDIDATE_MB_TYPE_FORWARD:
3300                     s->mv_dir = MV_DIR_FORWARD;
3301                     s->mb_intra= 0;
3302                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3303                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3304                     break;
3305                 case CANDIDATE_MB_TYPE_FORWARD_I:
3306                     s->mv_dir = MV_DIR_FORWARD;
3307                     s->mv_type = MV_TYPE_FIELD;
3308                     s->mb_intra= 0;
3309                     for(i=0; i<2; i++){
3310                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3311                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3312                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3313                     }
3314                     break;
3315                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3316                     s->mv_dir = MV_DIR_BACKWARD;
3317                     s->mv_type = MV_TYPE_FIELD;
3318                     s->mb_intra= 0;
3319                     for(i=0; i<2; i++){
3320                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3321                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3322                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3323                     }
3324                     break;
3325                 case CANDIDATE_MB_TYPE_BIDIR_I:
3326                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3327                     s->mv_type = MV_TYPE_FIELD;
3328                     s->mb_intra= 0;
3329                     for(dir=0; dir<2; dir++){
3330                         for(i=0; i<2; i++){
3331                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3332                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3333                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3334                         }
3335                     }
3336                     break;
3337                 default:
3338                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3339                 }
3340
3341                 encode_mb(s, motion_x, motion_y);
3342
3343                 // RAL: Update last macroblock type
3344                 s->last_mv_dir = s->mv_dir;
3345
3346                 if (CONFIG_H263_ENCODER &&
3347                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3348                     ff_h263_update_motion_val(s);
3349
3350                 ff_mpv_decode_mb(s, s->block);
3351             }
3352
3353             /* clean the MV table in IPS frames for direct mode in B frames */
3354             if(s->mb_intra /* && I,P,S_TYPE */){
3355                 s->p_mv_table[xy][0]=0;
3356                 s->p_mv_table[xy][1]=0;
3357             }
3358
3359             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3360                 int w= 16;
3361                 int h= 16;
3362
3363                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3364                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3365
3366                 s->current_picture.error[0] += sse(
3367                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3368                     s->dest[0], w, h, s->linesize);
3369                 s->current_picture.error[1] += sse(
3370                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3371                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3372                 s->current_picture.error[2] += sse(
3373                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3374                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3375             }
3376             if(s->loop_filter){
3377                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3378                     ff_h263_loop_filter(s);
3379             }
3380             ff_dlog(s->avctx, "MB %d %d bits\n",
3381                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3382         }
3383     }
3384
3385     //not beautiful here but we must write it before flushing so it has to be here
3386     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3387         ff_msmpeg4_encode_ext_header(s);
3388
3389     write_slice_end(s);
3390
3391     /* Send the last GOB if RTP */
3392     if (s->avctx->rtp_callback) {
3393         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3394         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3395         /* Call the RTP callback to send the last GOB */
3396         emms_c();
3397         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3398     }
3399
3400     return 0;
3401 }
3402
3403 #define MERGE(field) dst->field += src->field; src->field=0
3404 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3405     MERGE(me.scene_change_score);
3406     MERGE(me.mc_mb_var_sum_temp);
3407     MERGE(me.mb_var_sum_temp);
3408 }
3409
3410 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3411     int i;
3412
3413     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3414     MERGE(dct_count[1]);
3415     MERGE(mv_bits);
3416     MERGE(i_tex_bits);
3417     MERGE(p_tex_bits);
3418     MERGE(i_count);
3419     MERGE(f_count);
3420     MERGE(b_count);
3421     MERGE(skip_count);
3422     MERGE(misc_bits);
3423     MERGE(er.error_count);
3424     MERGE(padding_bug_score);
3425     MERGE(current_picture.error[0]);
3426     MERGE(current_picture.error[1]);
3427     MERGE(current_picture.error[2]);
3428
3429     if(dst->avctx->noise_reduction){
3430         for(i=0; i<64; i++){
3431             MERGE(dct_error_sum[0][i]);
3432             MERGE(dct_error_sum[1][i]);
3433         }
3434     }
3435
3436     assert(put_bits_count(&src->pb) % 8 ==0);
3437     assert(put_bits_count(&dst->pb) % 8 ==0);
3438     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3439     flush_put_bits(&dst->pb);
3440 }
3441
3442 static int estimate_qp(MpegEncContext *s, int dry_run){
3443     if (s->next_lambda){
3444         s->current_picture_ptr->f->quality =
3445         s->current_picture.f->quality = s->next_lambda;
3446         if(!dry_run) s->next_lambda= 0;
3447     } else if (!s->fixed_qscale) {
3448         s->current_picture_ptr->f->quality =
3449         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3450         if (s->current_picture.f->quality < 0)
3451             return -1;
3452     }
3453
3454     if(s->adaptive_quant){
3455         switch(s->codec_id){
3456         case AV_CODEC_ID_MPEG4:
3457             if (CONFIG_MPEG4_ENCODER)
3458                 ff_clean_mpeg4_qscales(s);
3459             break;
3460         case AV_CODEC_ID_H263:
3461         case AV_CODEC_ID_H263P:
3462         case AV_CODEC_ID_FLV1:
3463             if (CONFIG_H263_ENCODER)
3464                 ff_clean_h263_qscales(s);
3465             break;
3466         default:
3467             ff_init_qscale_tab(s);
3468         }
3469
3470         s->lambda= s->lambda_table[0];
3471         //FIXME broken
3472     }else
3473         s->lambda = s->current_picture.f->quality;
3474     update_qscale(s);
3475     return 0;
3476 }
3477
3478 /* must be called before writing the header */
3479 static void set_frame_distances(MpegEncContext * s){
3480     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3481     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3482
3483     if(s->pict_type==AV_PICTURE_TYPE_B){
3484         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3485         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3486     }else{
3487         s->pp_time= s->time - s->last_non_b_time;
3488         s->last_non_b_time= s->time;
3489         assert(s->picture_number==0 || s->pp_time > 0);
3490     }
3491 }
3492
3493 static int encode_picture(MpegEncContext *s, int picture_number)
3494 {
3495     int i, ret;
3496     int bits;
3497     int context_count = s->slice_context_count;
3498
3499     s->picture_number = picture_number;
3500
3501     /* Reset the average MB variance */
3502     s->me.mb_var_sum_temp    =
3503     s->me.mc_mb_var_sum_temp = 0;
3504
3505     /* we need to initialize some time vars before we can encode b-frames */
3506     // RAL: Condition added for MPEG1VIDEO
3507     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3508         set_frame_distances(s);
3509     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3510         ff_set_mpeg4_time(s);
3511
3512     s->me.scene_change_score=0;
3513
3514 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3515
3516     if(s->pict_type==AV_PICTURE_TYPE_I){
3517         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3518         else                        s->no_rounding=0;
3519     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3520         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3521             s->no_rounding ^= 1;
3522     }
3523
3524     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3525         if (estimate_qp(s,1) < 0)
3526             return -1;
3527         ff_get_2pass_fcode(s);
3528     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3529         if(s->pict_type==AV_PICTURE_TYPE_B)
3530             s->lambda= s->last_lambda_for[s->pict_type];
3531         else
3532             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3533         update_qscale(s);
3534     }
3535
3536     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3537         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3538         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3539         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3540         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3541     }
3542
3543     s->mb_intra=0; //for the rate distortion & bit compare functions
3544     for(i=1; i<context_count; i++){
3545         ret = ff_update_duplicate_context(s->thread_context[i], s);
3546         if (ret < 0)
3547             return ret;
3548     }
3549
3550     if(ff_init_me(s)<0)
3551         return -1;
3552
3553     /* Estimate motion for every MB */
3554     if(s->pict_type != AV_PICTURE_TYPE_I){
3555         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3556         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3557         if (s->pict_type != AV_PICTURE_TYPE_B) {
3558             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3559                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3560             }
3561         }
3562
3563         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3564     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3565         /* I-Frame */
3566         for(i=0; i<s->mb_stride*s->mb_height; i++)
3567             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3568
3569         if(!s->fixed_qscale){
3570             /* finding spatial complexity for I-frame rate control */
3571             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3572         }
3573     }
3574     for(i=1; i<context_count; i++){
3575         merge_context_after_me(s, s->thread_context[i]);
3576     }
3577     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3578     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3579     emms_c();
3580
3581     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3582         s->pict_type= AV_PICTURE_TYPE_I;
3583         for(i=0; i<s->mb_stride*s->mb_height; i++)
3584             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3585         if(s->msmpeg4_version >= 3)
3586             s->no_rounding=1;
3587         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3588                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3589     }
3590
3591     if(!s->umvplus){
3592         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3593             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3594
3595             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3596                 int a,b;
3597                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3598                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3599                 s->f_code= FFMAX3(s->f_code, a, b);
3600             }
3601
3602             ff_fix_long_p_mvs(s);
3603             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3604             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3605                 int j;
3606                 for(i=0; i<2; i++){
3607                     for(j=0; j<2; j++)
3608                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3609                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3610                 }
3611             }
3612         }
3613
3614         if(s->pict_type==AV_PICTURE_TYPE_B){
3615             int a, b;
3616
3617             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3618             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3619             s->f_code = FFMAX(a, b);
3620
3621             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3622             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3623             s->b_code = FFMAX(a, b);
3624
3625             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3626             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3627             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3628             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3629             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3630                 int dir, j;
3631                 for(dir=0; dir<2; dir++){
3632                     for(i=0; i<2; i++){
3633                         for(j=0; j<2; j++){
3634                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3635                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3636                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3637                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3638                         }
3639                     }
3640                 }
3641             }
3642         }
3643     }
3644
3645     if (estimate_qp(s, 0) < 0)
3646         return -1;
3647
3648     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3649         s->pict_type == AV_PICTURE_TYPE_I &&
3650         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3651         s->qscale= 3; //reduce clipping problems
3652
3653     if (s->out_format == FMT_MJPEG) {
3654         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3655         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3656
3657         if (s->avctx->intra_matrix) {
3658             chroma_matrix =
3659             luma_matrix = s->avctx->intra_matrix;
3660         }
3661         if (s->avctx->chroma_intra_matrix)
3662             chroma_matrix = s->avctx->chroma_intra_matrix;
3663
3664         /* for mjpeg, we do include qscale in the matrix */
3665         for(i=1;i<64;i++){
3666             int j = s->idsp.idct_permutation[i];
3667
3668             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3669             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3670         }
3671         s->y_dc_scale_table=
3672         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3673         s->chroma_intra_matrix[0] =
3674         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3675         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3676                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3677         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3678                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3679         s->qscale= 8;
3680     }
3681     if(s->codec_id == AV_CODEC_ID_AMV){
3682         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3683         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3684         for(i=1;i<64;i++){
3685             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3686
3687             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3688             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3689         }
3690         s->y_dc_scale_table= y;
3691         s->c_dc_scale_table= c;
3692         s->intra_matrix[0] = 13;
3693         s->chroma_intra_matrix[0] = 14;
3694         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3695                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3696         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3697                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3698         s->qscale= 8;
3699     }
3700
3701     //FIXME var duplication
3702     s->current_picture_ptr->f->key_frame =
3703     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3704     s->current_picture_ptr->f->pict_type =
3705     s->current_picture.f->pict_type = s->pict_type;
3706
3707     if (s->current_picture.f->key_frame)
3708         s->picture_in_gop_number=0;
3709
3710     s->mb_x = s->mb_y = 0;
3711     s->last_bits= put_bits_count(&s->pb);
3712     switch(s->out_format) {
3713     case FMT_MJPEG:
3714         if (CONFIG_MJPEG_ENCODER)
3715             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3716                                            s->intra_matrix, s->chroma_intra_matrix);
3717         break;
3718     case FMT_H261:
3719         if (CONFIG_H261_ENCODER)
3720             ff_h261_encode_picture_header(s, picture_number);
3721         break;
3722     case FMT_H263:
3723         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3724             ff_wmv2_encode_picture_header(s, picture_number);
3725         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3726             ff_msmpeg4_encode_picture_header(s, picture_number);
3727         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3728             ff_mpeg4_encode_picture_header(s, picture_number);
3729         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3730             ret = ff_rv10_encode_picture_header(s, picture_number);
3731             if (ret < 0)
3732                 return ret;
3733         }
3734         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3735             ff_rv20_encode_picture_header(s, picture_number);
3736         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3737             ff_flv_encode_picture_header(s, picture_number);
3738         else if (CONFIG_H263_ENCODER)
3739             ff_h263_encode_picture_header(s, picture_number);
3740         break;
3741     case FMT_MPEG1:
3742         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3743             ff_mpeg1_encode_picture_header(s, picture_number);
3744         break;
3745     default:
3746         av_assert0(0);
3747     }
3748     bits= put_bits_count(&s->pb);
3749     s->header_bits= bits - s->last_bits;
3750
3751     for(i=1; i<context_count; i++){
3752         update_duplicate_context_after_me(s->thread_context[i], s);
3753     }
3754     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3755     for(i=1; i<context_count; i++){
3756         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3757             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3758         merge_context_after_encode(s, s->thread_context[i]);
3759     }
3760     emms_c();
3761     return 0;
3762 }
3763
3764 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3765     const int intra= s->mb_intra;
3766     int i;
3767
3768     s->dct_count[intra]++;
3769
3770     for(i=0; i<64; i++){
3771         int level= block[i];
3772
3773         if(level){
3774             if(level>0){
3775                 s->dct_error_sum[intra][i] += level;
3776                 level -= s->dct_offset[intra][i];
3777                 if(level<0) level=0;
3778             }else{
3779                 s->dct_error_sum[intra][i] -= level;
3780                 level += s->dct_offset[intra][i];
3781                 if(level>0) level=0;
3782             }
3783             block[i]= level;
3784         }
3785     }
3786 }
3787
3788 static int dct_quantize_trellis_c(MpegEncContext *s,
3789                                   int16_t *block, int n,
3790                                   int qscale, int *overflow){
3791     const int *qmat;
3792     const uint16_t *matrix;
3793     const uint8_t *scantable= s->intra_scantable.scantable;
3794     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3795     int max=0;
3796     unsigned int threshold1, threshold2;
3797     int bias=0;
3798     int run_tab[65];
3799     int level_tab[65];
3800     int score_tab[65];
3801     int survivor[65];
3802     int survivor_count;
3803     int last_run=0;
3804     int last_level=0;
3805     int last_score= 0;
3806     int last_i;
3807     int coeff[2][64];
3808     int coeff_count[64];
3809     int qmul, qadd, start_i, last_non_zero, i, dc;
3810     const int esc_length= s->ac_esc_length;
3811     uint8_t * length;
3812     uint8_t * last_length;
3813     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3814
3815     s->fdsp.fdct(block);
3816
3817     if(s->dct_error_sum)
3818         s->denoise_dct(s, block);
3819     qmul= qscale*16;
3820     qadd= ((qscale-1)|1)*8;
3821
3822     if (s->mb_intra) {
3823         int q;
3824         if (!s->h263_aic) {
3825             if (n < 4)
3826                 q = s->y_dc_scale;
3827             else
3828                 q = s->c_dc_scale;
3829             q = q << 3;
3830         } else{
3831             /* For AIC we skip quant/dequant of INTRADC */
3832             q = 1 << 3;
3833             qadd=0;
3834         }
3835
3836         /* note: block[0] is assumed to be positive */
3837         block[0] = (block[0] + (q >> 1)) / q;
3838         start_i = 1;
3839         last_non_zero = 0;
3840         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3841         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3842         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3843             bias= 1<<(QMAT_SHIFT-1);
3844
3845         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3846             length     = s->intra_chroma_ac_vlc_length;
3847             last_length= s->intra_chroma_ac_vlc_last_length;
3848         } else {
3849             length     = s->intra_ac_vlc_length;
3850             last_length= s->intra_ac_vlc_last_length;
3851         }
3852     } else {
3853         start_i = 0;
3854         last_non_zero = -1;
3855         qmat = s->q_inter_matrix[qscale];
3856         matrix = s->inter_matrix;
3857         length     = s->inter_ac_vlc_length;
3858         last_length= s->inter_ac_vlc_last_length;
3859     }
3860     last_i= start_i;
3861
3862     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3863     threshold2= (threshold1<<1);
3864
3865     for(i=63; i>=start_i; i--) {
3866         const int j = scantable[i];
3867         int level = block[j] * qmat[j];
3868
3869         if(((unsigned)(level+threshold1))>threshold2){
3870             last_non_zero = i;
3871             break;
3872         }
3873     }
3874
3875     for(i=start_i; i<=last_non_zero; i++) {
3876         const int j = scantable[i];
3877         int level = block[j] * qmat[j];
3878
3879 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3880 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3881         if(((unsigned)(level+threshold1))>threshold2){
3882             if(level>0){
3883                 level= (bias + level)>>QMAT_SHIFT;
3884                 coeff[0][i]= level;
3885                 coeff[1][i]= level-1;
3886 //                coeff[2][k]= level-2;
3887             }else{
3888                 level= (bias - level)>>QMAT_SHIFT;
3889                 coeff[0][i]= -level;
3890                 coeff[1][i]= -level+1;
3891 //                coeff[2][k]= -level+2;
3892             }
3893             coeff_count[i]= FFMIN(level, 2);
3894             av_assert2(coeff_count[i]);
3895             max |=level;
3896         }else{
3897             coeff[0][i]= (level>>31)|1;
3898             coeff_count[i]= 1;
3899         }
3900     }
3901
3902     *overflow= s->max_qcoeff < max; //overflow might have happened
3903
3904     if(last_non_zero < start_i){
3905         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3906         return last_non_zero;
3907     }
3908
3909     score_tab[start_i]= 0;
3910     survivor[0]= start_i;
3911     survivor_count= 1;
3912
3913     for(i=start_i; i<=last_non_zero; i++){
3914         int level_index, j, zero_distortion;
3915         int dct_coeff= FFABS(block[ scantable[i] ]);
3916         int best_score=256*256*256*120;
3917
3918         if (s->fdsp.fdct == ff_fdct_ifast)
3919             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3920         zero_distortion= dct_coeff*dct_coeff;
3921
3922         for(level_index=0; level_index < coeff_count[i]; level_index++){
3923             int distortion;
3924             int level= coeff[level_index][i];
3925             const int alevel= FFABS(level);
3926             int unquant_coeff;
3927
3928             av_assert2(level);
3929
3930             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3931                 unquant_coeff= alevel*qmul + qadd;
3932             } else if(s->out_format == FMT_MJPEG) {
3933                 j = s->idsp.idct_permutation[scantable[i]];
3934                 unquant_coeff = alevel * matrix[j] * 8;
3935             }else{ //MPEG1
3936                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3937                 if(s->mb_intra){
3938                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3939                         unquant_coeff =   (unquant_coeff - 1) | 1;
3940                 }else{
3941                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3942                         unquant_coeff =   (unquant_coeff - 1) | 1;
3943                 }
3944                 unquant_coeff<<= 3;
3945             }
3946
3947             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3948             level+=64;
3949             if((level&(~127)) == 0){
3950                 for(j=survivor_count-1; j>=0; j--){
3951                     int run= i - survivor[j];
3952                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3953                     score += score_tab[i-run];
3954
3955                     if(score < best_score){
3956                         best_score= score;
3957                         run_tab[i+1]= run;
3958                         level_tab[i+1]= level-64;
3959                     }
3960                 }
3961
3962                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3963                     for(j=survivor_count-1; j>=0; j--){
3964                         int run= i - survivor[j];
3965                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3966                         score += score_tab[i-run];
3967                         if(score < last_score){
3968                             last_score= score;
3969                             last_run= run;
3970                             last_level= level-64;
3971                             last_i= i+1;
3972                         }
3973                     }
3974                 }
3975             }else{
3976                 distortion += esc_length*lambda;
3977                 for(j=survivor_count-1; j>=0; j--){
3978                     int run= i - survivor[j];
3979                     int score= distortion + score_tab[i-run];
3980
3981                     if(score < best_score){
3982                         best_score= score;
3983                         run_tab[i+1]= run;
3984                         level_tab[i+1]= level-64;
3985                     }
3986                 }
3987
3988                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3989                   for(j=survivor_count-1; j>=0; j--){
3990                         int run= i - survivor[j];
3991                         int score= distortion + score_tab[i-run];
3992                         if(score < last_score){
3993                             last_score= score;
3994                             last_run= run;
3995                             last_level= level-64;
3996                             last_i= i+1;
3997                         }
3998                     }
3999                 }
4000             }
4001         }
4002
4003         score_tab[i+1]= best_score;
4004
4005         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4006         if(last_non_zero <= 27){
4007             for(; survivor_count; survivor_count--){
4008                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4009                     break;
4010             }
4011         }else{
4012             for(; survivor_count; survivor_count--){
4013                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4014                     break;
4015             }
4016         }
4017
4018         survivor[ survivor_count++ ]= i+1;
4019     }
4020
4021     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4022         last_score= 256*256*256*120;
4023         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4024             int score= score_tab[i];
4025             if(i) score += lambda*2; //FIXME exacter?
4026
4027             if(score < last_score){
4028                 last_score= score;
4029                 last_i= i;
4030                 last_level= level_tab[i];
4031                 last_run= run_tab[i];
4032             }
4033         }
4034     }
4035
4036     s->coded_score[n] = last_score;
4037
4038     dc= FFABS(block[0]);
4039     last_non_zero= last_i - 1;
4040     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4041
4042     if(last_non_zero < start_i)
4043         return last_non_zero;
4044
4045     if(last_non_zero == 0 && start_i == 0){
4046         int best_level= 0;
4047         int best_score= dc * dc;
4048
4049         for(i=0; i<coeff_count[0]; i++){
4050             int level= coeff[i][0];
4051             int alevel= FFABS(level);
4052             int unquant_coeff, score, distortion;
4053
4054             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4055                     unquant_coeff= (alevel*qmul + qadd)>>3;
4056             }else{ //MPEG1
4057                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4058                     unquant_coeff =   (unquant_coeff - 1) | 1;
4059             }
4060             unquant_coeff = (unquant_coeff + 4) >> 3;
4061             unquant_coeff<<= 3 + 3;
4062
4063             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4064             level+=64;
4065             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4066             else                    score= distortion + esc_length*lambda;
4067
4068             if(score < best_score){
4069                 best_score= score;
4070                 best_level= level - 64;
4071             }
4072         }
4073         block[0]= best_level;
4074         s->coded_score[n] = best_score - dc*dc;
4075         if(best_level == 0) return -1;
4076         else                return last_non_zero;
4077     }
4078
4079     i= last_i;
4080     av_assert2(last_level);
4081
4082     block[ perm_scantable[last_non_zero] ]= last_level;
4083     i -= last_run + 1;
4084
4085     for(; i>start_i; i -= run_tab[i] + 1){
4086         block[ perm_scantable[i-1] ]= level_tab[i];
4087     }
4088
4089     return last_non_zero;
4090 }
4091
4092 //#define REFINE_STATS 1
4093 static int16_t basis[64][64];
4094
4095 static void build_basis(uint8_t *perm){
4096     int i, j, x, y;
4097     emms_c();
4098     for(i=0; i<8; i++){
4099         for(j=0; j<8; j++){
4100             for(y=0; y<8; y++){
4101                 for(x=0; x<8; x++){
4102                     double s= 0.25*(1<<BASIS_SHIFT);
4103                     int index= 8*i + j;
4104                     int perm_index= perm[index];
4105                     if(i==0) s*= sqrt(0.5);
4106                     if(j==0) s*= sqrt(0.5);
4107                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4108                 }
4109             }
4110         }
4111     }
4112 }
4113
4114 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4115                         int16_t *block, int16_t *weight, int16_t *orig,
4116                         int n, int qscale){
4117     int16_t rem[64];
4118     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4119     const uint8_t *scantable= s->intra_scantable.scantable;
4120     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4121 //    unsigned int threshold1, threshold2;
4122 //    int bias=0;
4123     int run_tab[65];
4124     int prev_run=0;
4125     int prev_level=0;
4126     int qmul, qadd, start_i, last_non_zero, i, dc;
4127     uint8_t * length;
4128     uint8_t * last_length;
4129     int lambda;
4130     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4131 #ifdef REFINE_STATS
4132 static int count=0;
4133 static int after_last=0;
4134 static int to_zero=0;
4135 static int from_zero=0;
4136 static int raise=0;
4137 static int lower=0;
4138 static int messed_sign=0;
4139 #endif
4140
4141     if(basis[0][0] == 0)
4142         build_basis(s->idsp.idct_permutation);
4143
4144     qmul= qscale*2;
4145     qadd= (qscale-1)|1;
4146     if (s->mb_intra) {
4147         if (!s->h263_aic) {
4148             if (n < 4)
4149                 q = s->y_dc_scale;
4150             else
4151                 q = s->c_dc_scale;
4152         } else{
4153             /* For AIC we skip quant/dequant of INTRADC */
4154             q = 1;
4155             qadd=0;
4156         }
4157         q <<= RECON_SHIFT-3;
4158         /* note: block[0] is assumed to be positive */
4159         dc= block[0]*q;
4160 //        block[0] = (block[0] + (q >> 1)) / q;
4161         start_i = 1;
4162 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4163 //            bias= 1<<(QMAT_SHIFT-1);
4164         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4165             length     = s->intra_chroma_ac_vlc_length;
4166             last_length= s->intra_chroma_ac_vlc_last_length;
4167         } else {
4168             length     = s->intra_ac_vlc_length;
4169             last_length= s->intra_ac_vlc_last_length;
4170         }
4171     } else {
4172         dc= 0;
4173         start_i = 0;
4174         length     = s->inter_ac_vlc_length;
4175         last_length= s->inter_ac_vlc_last_length;
4176     }
4177     last_non_zero = s->block_last_index[n];
4178
4179 #ifdef REFINE_STATS
4180 {START_TIMER
4181 #endif
4182     dc += (1<<(RECON_SHIFT-1));
4183     for(i=0; i<64; i++){
4184         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4185     }
4186 #ifdef REFINE_STATS
4187 STOP_TIMER("memset rem[]")}
4188 #endif
4189     sum=0;
4190     for(i=0; i<64; i++){
4191         int one= 36;
4192         int qns=4;
4193         int w;
4194
4195         w= FFABS(weight[i]) + qns*one;
4196         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4197
4198         weight[i] = w;
4199 //        w=weight[i] = (63*qns + (w/2)) / w;
4200
4201         av_assert2(w>0);
4202         av_assert2(w<(1<<6));
4203         sum += w*w;
4204     }
4205     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4206 #ifdef REFINE_STATS
4207 {START_TIMER
4208 #endif
4209     run=0;
4210     rle_index=0;
4211     for(i=start_i; i<=last_non_zero; i++){
4212         int j= perm_scantable[i];
4213         const int level= block[j];
4214         int coeff;
4215
4216         if(level){
4217             if(level<0) coeff= qmul*level - qadd;
4218             else        coeff= qmul*level + qadd;
4219             run_tab[rle_index++]=run;
4220             run=0;
4221
4222             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4223         }else{
4224             run++;
4225         }
4226     }
4227 #ifdef REFINE_STATS
4228 if(last_non_zero>0){
4229 STOP_TIMER("init rem[]")
4230 }
4231 }
4232
4233 {START_TIMER
4234 #endif
4235     for(;;){
4236         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4237         int best_coeff=0;
4238         int best_change=0;
4239         int run2, best_unquant_change=0, analyze_gradient;
4240 #ifdef REFINE_STATS
4241 {START_TIMER
4242 #endif
4243         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4244
4245         if(analyze_gradient){
4246 #ifdef REFINE_STATS
4247 {START_TIMER
4248 #endif
4249             for(i=0; i<64; i++){
4250                 int w= weight[i];
4251
4252                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4253             }
4254 #ifdef REFINE_STATS
4255 STOP_TIMER("rem*w*w")}
4256 {START_TIMER
4257 #endif
4258             s->fdsp.fdct(d1);
4259 #ifdef REFINE_STATS
4260 STOP_TIMER("dct")}
4261 #endif
4262         }
4263
4264         if(start_i){
4265             const int level= block[0];
4266             int change, old_coeff;
4267
4268             av_assert2(s->mb_intra);
4269
4270             old_coeff= q*level;
4271
4272             for(change=-1; change<=1; change+=2){
4273                 int new_level= level + change;
4274                 int score, new_coeff;
4275
4276                 new_coeff= q*new_level;
4277                 if(new_coeff >= 2048 || new_coeff < 0)
4278                     continue;
4279
4280                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4281                                                   new_coeff - old_coeff);
4282                 if(score<best_score){
4283                     best_score= score;
4284                     best_coeff= 0;
4285                     best_change= change;
4286                     best_unquant_change= new_coeff - old_coeff;
4287                 }
4288             }
4289         }
4290
4291         run=0;
4292         rle_index=0;
4293         run2= run_tab[rle_index++];
4294         prev_level=0;
4295         prev_run=0;
4296
4297         for(i=start_i; i<64; i++){
4298             int j= perm_scantable[i];
4299             const int level= block[j];
4300             int change, old_coeff;
4301
4302             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4303                 break;
4304
4305             if(level){
4306                 if(level<0) old_coeff= qmul*level - qadd;
4307                 else        old_coeff= qmul*level + qadd;
4308                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4309             }else{
4310                 old_coeff=0;
4311                 run2--;
4312                 av_assert2(run2>=0 || i >= last_non_zero );
4313             }
4314
4315             for(change=-1; change<=1; change+=2){
4316                 int new_level= level + change;
4317                 int score, new_coeff, unquant_change;
4318
4319                 score=0;
4320                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4321                    continue;
4322
4323                 if(new_level){
4324                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4325                     else            new_coeff= qmul*new_level + qadd;
4326                     if(new_coeff >= 2048 || new_coeff <= -2048)
4327                         continue;
4328                     //FIXME check for overflow
4329
4330                     if(level){
4331                         if(level < 63 && level > -63){
4332                             if(i < last_non_zero)
4333                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4334                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4335                             else
4336                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4337                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4338                         }
4339                     }else{
4340                         av_assert2(FFABS(new_level)==1);
4341
4342                         if(analyze_gradient){
4343                             int g= d1[ scantable[i] ];
4344                             if(g && (g^new_level) >= 0)
4345                                 continue;
4346                         }
4347
4348                         if(i < last_non_zero){
4349                             int next_i= i + run2 + 1;
4350                             int next_level= block[ perm_scantable[next_i] ] + 64;
4351
4352                             if(next_level&(~127))
4353                                 next_level= 0;
4354
4355                             if(next_i < last_non_zero)
4356                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4357                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4358                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4359                             else
4360                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4361                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4362                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4363                         }else{
4364                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4365                             if(prev_level){
4366                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4367                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4368                             }
4369                         }
4370                     }
4371                 }else{
4372                     new_coeff=0;
4373                     av_assert2(FFABS(level)==1);
4374
4375                     if(i < last_non_zero){
4376                         int next_i= i + run2 + 1;
4377                         int next_level= block[ perm_scantable[next_i] ] + 64;
4378
4379                         if(next_level&(~127))
4380                             next_level= 0;
4381
4382                         if(next_i < last_non_zero)
4383                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4384                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4385                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4386                         else
4387                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4388                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4389                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4390                     }else{
4391                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4392                         if(prev_level){
4393                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4394                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4395                         }
4396                     }
4397                 }
4398
4399                 score *= lambda;
4400
4401                 unquant_change= new_coeff - old_coeff;
4402                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4403
4404                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4405                                                    unquant_change);
4406                 if(score<best_score){
4407                     best_score= score;
4408                     best_coeff= i;
4409                     best_change= change;
4410                     best_unquant_change= unquant_change;
4411                 }
4412             }
4413             if(level){
4414                 prev_level= level + 64;
4415                 if(prev_level&(~127))
4416                     prev_level= 0;
4417                 prev_run= run;
4418                 run=0;
4419             }else{
4420                 run++;
4421             }
4422         }
4423 #ifdef REFINE_STATS
4424 STOP_TIMER("iterative step")}
4425 #endif
4426
4427         if(best_change){
4428             int j= perm_scantable[ best_coeff ];
4429
4430             block[j] += best_change;
4431
4432             if(best_coeff > last_non_zero){
4433                 last_non_zero= best_coeff;
4434                 av_assert2(block[j]);
4435 #ifdef REFINE_STATS
4436 after_last++;
4437 #endif
4438             }else{
4439 #ifdef REFINE_STATS
4440 if(block[j]){
4441     if(block[j] - best_change){
4442         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4443             raise++;
4444         }else{
4445             lower++;
4446         }
4447     }else{
4448         from_zero++;
4449     }
4450 }else{
4451     to_zero++;
4452 }
4453 #endif
4454                 for(; last_non_zero>=start_i; last_non_zero--){
4455                     if(block[perm_scantable[last_non_zero]])
4456                         break;
4457                 }
4458             }
4459 #ifdef REFINE_STATS
4460 count++;
4461 if(256*256*256*64 % count == 0){
4462     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4463 }
4464 #endif
4465             run=0;
4466             rle_index=0;
4467             for(i=start_i; i<=last_non_zero; i++){
4468                 int j= perm_scantable[i];
4469                 const int level= block[j];
4470
4471                  if(level){
4472                      run_tab[rle_index++]=run;
4473                      run=0;
4474                  }else{
4475                      run++;
4476                  }
4477             }
4478
4479             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4480         }else{
4481             break;
4482         }
4483     }
4484 #ifdef REFINE_STATS
4485 if(last_non_zero>0){
4486 STOP_TIMER("iterative search")
4487 }
4488 }
4489 #endif
4490
4491     return last_non_zero;
4492 }
4493
4494 /**
4495  * Permute an 8x8 block according to permuatation.
4496  * @param block the block which will be permuted according to
4497  *              the given permutation vector
4498  * @param permutation the permutation vector
4499  * @param last the last non zero coefficient in scantable order, used to
4500  *             speed the permutation up
4501  * @param scantable the used scantable, this is only used to speed the
4502  *                  permutation up, the block is not (inverse) permutated
4503  *                  to scantable order!
4504  */
4505 static void block_permute(int16_t *block, uint8_t *permutation,
4506                           const uint8_t *scantable, int last)
4507 {
4508     int i;
4509     int16_t temp[64];
4510
4511     if (last <= 0)
4512         return;
4513     //FIXME it is ok but not clean and might fail for some permutations
4514     // if (permutation[1] == 1)
4515     // return;
4516
4517     for (i = 0; i <= last; i++) {
4518         const int j = scantable[i];
4519         temp[j] = block[j];
4520         block[j] = 0;
4521     }
4522
4523     for (i = 0; i <= last; i++) {
4524         const int j = scantable[i];
4525         const int perm_j = permutation[j];
4526         block[perm_j] = temp[j];
4527     }
4528 }
4529
4530 int ff_dct_quantize_c(MpegEncContext *s,
4531                         int16_t *block, int n,
4532                         int qscale, int *overflow)
4533 {
4534     int i, j, level, last_non_zero, q, start_i;
4535     const int *qmat;
4536     const uint8_t *scantable= s->intra_scantable.scantable;
4537     int bias;
4538     int max=0;
4539     unsigned int threshold1, threshold2;
4540
4541     s->fdsp.fdct(block);
4542
4543     if(s->dct_error_sum)
4544         s->denoise_dct(s, block);
4545
4546     if (s->mb_intra) {
4547         if (!s->h263_aic) {
4548             if (n < 4)
4549                 q = s->y_dc_scale;
4550             else
4551                 q = s->c_dc_scale;
4552             q = q << 3;
4553         } else
4554             /* For AIC we skip quant/dequant of INTRADC */
4555             q = 1 << 3;
4556
4557         /* note: block[0] is assumed to be positive */
4558         block[0] = (block[0] + (q >> 1)) / q;
4559         start_i = 1;
4560         last_non_zero = 0;
4561         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4562         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4563     } else {
4564         start_i = 0;
4565         last_non_zero = -1;
4566         qmat = s->q_inter_matrix[qscale];
4567         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4568     }
4569     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4570     threshold2= (threshold1<<1);
4571     for(i=63;i>=start_i;i--) {
4572         j = scantable[i];
4573         level = block[j] * qmat[j];
4574
4575         if(((unsigned)(level+threshold1))>threshold2){
4576             last_non_zero = i;
4577             break;
4578         }else{
4579             block[j]=0;
4580         }
4581     }
4582     for(i=start_i; i<=last_non_zero; i++) {
4583         j = scantable[i];
4584         level = block[j] * qmat[j];
4585
4586 //        if(   bias+level >= (1<<QMAT_SHIFT)
4587 //           || bias-level >= (1<<QMAT_SHIFT)){
4588         if(((unsigned)(level+threshold1))>threshold2){
4589             if(level>0){
4590                 level= (bias + level)>>QMAT_SHIFT;
4591                 block[j]= level;
4592             }else{
4593                 level= (bias - level)>>QMAT_SHIFT;
4594                 block[j]= -level;
4595             }
4596             max |=level;
4597         }else{
4598             block[j]=0;
4599         }
4600     }
4601     *overflow= s->max_qcoeff < max; //overflow might have happened
4602
4603     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4604     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4605         block_permute(block, s->idsp.idct_permutation,
4606                       scantable, last_non_zero);
4607
4608     return last_non_zero;
4609 }
4610
4611 #define OFFSET(x) offsetof(MpegEncContext, x)
4612 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4613 static const AVOption h263_options[] = {
4614     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4615     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4616     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4617     FF_MPV_COMMON_OPTS
4618     { NULL },
4619 };
4620
4621 static const AVClass h263_class = {
4622     .class_name = "H.263 encoder",
4623     .item_name  = av_default_item_name,
4624     .option     = h263_options,
4625     .version    = LIBAVUTIL_VERSION_INT,
4626 };
4627
4628 AVCodec ff_h263_encoder = {
4629     .name           = "h263",
4630     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4631     .type           = AVMEDIA_TYPE_VIDEO,
4632     .id             = AV_CODEC_ID_H263,
4633     .priv_data_size = sizeof(MpegEncContext),
4634     .init           = ff_mpv_encode_init,
4635     .encode2        = ff_mpv_encode_picture,
4636     .close          = ff_mpv_encode_end,
4637     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4638     .priv_class     = &h263_class,
4639 };
4640
4641 static const AVOption h263p_options[] = {
4642     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4643     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4644     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4645     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4646     FF_MPV_COMMON_OPTS
4647     { NULL },
4648 };
4649 static const AVClass h263p_class = {
4650     .class_name = "H.263p encoder",
4651     .item_name  = av_default_item_name,
4652     .option     = h263p_options,
4653     .version    = LIBAVUTIL_VERSION_INT,
4654 };
4655
4656 AVCodec ff_h263p_encoder = {
4657     .name           = "h263p",
4658     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4659     .type           = AVMEDIA_TYPE_VIDEO,
4660     .id             = AV_CODEC_ID_H263P,
4661     .priv_data_size = sizeof(MpegEncContext),
4662     .init           = ff_mpv_encode_init,
4663     .encode2        = ff_mpv_encode_picture,
4664     .close          = ff_mpv_encode_end,
4665     .capabilities   = CODEC_CAP_SLICE_THREADS,
4666     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4667     .priv_class     = &h263p_class,
4668 };
4669
4670 static const AVClass msmpeg4v2_class = {
4671     .class_name = "msmpeg4v2 encoder",
4672     .item_name  = av_default_item_name,
4673     .option     = ff_mpv_generic_options,
4674     .version    = LIBAVUTIL_VERSION_INT,
4675 };
4676
4677 AVCodec ff_msmpeg4v2_encoder = {
4678     .name           = "msmpeg4v2",
4679     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4680     .type           = AVMEDIA_TYPE_VIDEO,
4681     .id             = AV_CODEC_ID_MSMPEG4V2,
4682     .priv_data_size = sizeof(MpegEncContext),
4683     .init           = ff_mpv_encode_init,
4684     .encode2        = ff_mpv_encode_picture,
4685     .close          = ff_mpv_encode_end,
4686     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4687     .priv_class     = &msmpeg4v2_class,
4688 };
4689
4690 static const AVClass msmpeg4v3_class = {
4691     .class_name = "msmpeg4v3 encoder",
4692     .item_name  = av_default_item_name,
4693     .option     = ff_mpv_generic_options,
4694     .version    = LIBAVUTIL_VERSION_INT,
4695 };
4696
4697 AVCodec ff_msmpeg4v3_encoder = {
4698     .name           = "msmpeg4",
4699     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4700     .type           = AVMEDIA_TYPE_VIDEO,
4701     .id             = AV_CODEC_ID_MSMPEG4V3,
4702     .priv_data_size = sizeof(MpegEncContext),
4703     .init           = ff_mpv_encode_init,
4704     .encode2        = ff_mpv_encode_picture,
4705     .close          = ff_mpv_encode_end,
4706     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4707     .priv_class     = &msmpeg4v3_class,
4708 };
4709
4710 static const AVClass wmv1_class = {
4711     .class_name = "wmv1 encoder",
4712     .item_name  = av_default_item_name,
4713     .option     = ff_mpv_generic_options,
4714     .version    = LIBAVUTIL_VERSION_INT,
4715 };
4716
4717 AVCodec ff_wmv1_encoder = {
4718     .name           = "wmv1",
4719     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4720     .type           = AVMEDIA_TYPE_VIDEO,
4721     .id             = AV_CODEC_ID_WMV1,
4722     .priv_data_size = sizeof(MpegEncContext),
4723     .init           = ff_mpv_encode_init,
4724     .encode2        = ff_mpv_encode_picture,
4725     .close          = ff_mpv_encode_end,
4726     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4727     .priv_class     = &wmv1_class,
4728 };