git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /*
  26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
  27  */
  28
  29 /**
  30  * @file
  31  * The simplest mpeg encoder (well, it was the simplest!).
  32  */
  33
  34 #include <stdint.h>
  35
  36 #include "libavutil/internal.h"
  37 #include "libavutil/intmath.h"
  38 #include "libavutil/mathematics.h"
  39 #include "libavutil/mem_internal.h"
  40 #include "libavutil/pixdesc.h"
  41 #include "libavutil/opt.h"
  42 #include "libavutil/thread.h"
  43 #include "avcodec.h"
  44 #include "dct.h"
  45 #include "idctdsp.h"
  46 #include "mpeg12.h"
  47 #include "mpegvideo.h"
  48 #include "mpegvideodata.h"
  49 #include "h261.h"
  50 #include "h263.h"
  51 #include "h263data.h"
  52 #include "mjpegenc_common.h"
  53 #include "mathops.h"
  54 #include "mpegutils.h"
  55 #include "mjpegenc.h"
  56 #include "speedhqenc.h"
  57 #include "msmpeg4.h"
  58 #include "pixblockdsp.h"
  59 #include "qpeldsp.h"
  60 #include "faandct.h"
  61 #include "thread.h"
  62 #include "aandcttab.h"
  63 #include "flv.h"
  64 #include "mpeg4video.h"
  65 #include "internal.h"
  66 #include "bytestream.h"
  67 #include "wmv2.h"
  68 #include "rv10.h"
  69 #include "packet_internal.h"
  70 #include <limits.h>
  71 #include "sp5x.h"
  72
  73 #define QUANT_BIAS_SHIFT 8
  74
  75 #define QMAT_SHIFT_MMX 16
  76 #define QMAT_SHIFT 21
  77
  78 static int encode_picture(MpegEncContext *s, int picture_number);
  79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  80 static int sse_mb(MpegEncContext *s);
  81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  83
  84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
  85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  86
  87 const AVOption ff_mpv_generic_options[] = {
  88     FF_MPV_COMMON_OPTS
  89 #if FF_API_MPEGVIDEO_OPTS
  90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
  91     FF_MPV_DEPRECATED_A53_CC_OPT
  92     FF_MPV_DEPRECATED_MATRIX_OPT
  93     FF_MPV_DEPRECATED_BFRAME_OPTS
  94 #endif
  95     { NULL },
  96 };
  97
  98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  99                        uint16_t (*qmat16)[2][64],
 100                        const uint16_t *quant_matrix,
 101                        int bias, int qmin, int qmax, int intra)
 102 {
 103     FDCTDSPContext *fdsp = &s->fdsp;
 104     int qscale;
 105     int shift = 0;
 106
 107     for (qscale = qmin; qscale <= qmax; qscale++) {
 108         int i;
 109         int qscale2;
 110
 111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
 112         else                 qscale2 = qscale << 1;
 113
 114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
 115 #if CONFIG_FAANDCT
 116             fdsp->fdct == ff_faandct            ||
 117 #endif /* CONFIG_FAANDCT */
 118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 119             for (i = 0; i < 64; i++) {
 120                 const int j = s->idsp.idct_permutation[i];
 121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 122                 /* 16 <= qscale * quant_matrix[i] <= 7905
 123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 124                  *             19952 <=              x  <= 249205026
 125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 126                  *           3444240 >= (1 << 36) / (x) >= 275 */
 127
 128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 129             }
 130         } else if (fdsp->fdct == ff_fdct_ifast) {
 131             for (i = 0; i < 64; i++) {
 132                 const int j = s->idsp.idct_permutation[i];
 133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
 134                 /* 16 <= qscale * quant_matrix[i] <= 7905
 135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 136                  *             19952 <=              x  <= 249205026
 137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 138                  *           3444240 >= (1 << 36) / (x) >= 275 */
 139
 140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
 141             }
 142         } else {
 143             for (i = 0; i < 64; i++) {
 144                 const int j = s->idsp.idct_permutation[i];
 145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 147                  * Assume x = qscale * quant_matrix[i]
 148                  * So             16 <=              x  <= 7905
 149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 150                  * so          32768 >= (1 << 19) / (x) >= 67 */
 151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 153                 //                    (qscale * quant_matrix[i]);
 154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
 155
 156                 if (qmat16[qscale][0][i] == 0 ||
 157                     qmat16[qscale][0][i] == 128 * 256)
 158                     qmat16[qscale][0][i] = 128 * 256 - 1;
 159                 qmat16[qscale][1][i] =
 160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
 161                                 qmat16[qscale][0][i]);
 162             }
 163         }
 164
 165         for (i = intra; i < 64; i++) {
 166             int64_t max = 8191;
 167             if (fdsp->fdct == ff_fdct_ifast) {
 168                 max = (8191LL * ff_aanscales[i]) >> 14;
 169             }
 170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 171                 shift++;
 172             }
 173         }
 174     }
 175     if (shift) {
 176         av_log(s->avctx, AV_LOG_INFO,
 177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 178                QMAT_SHIFT - shift);
 179     }
 180 }
 181
 182 static inline void update_qscale(MpegEncContext *s)
 183 {
 184     if (s->q_scale_type == 1 && 0) {
 185         int i;
 186         int bestdiff=INT_MAX;
 187         int best = 1;
 188
 189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
 190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
 191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
 192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
 193                 continue;
 194             if (diff < bestdiff) {
 195                 bestdiff = diff;
 196                 best = i;
 197             }
 198         }
 199         s->qscale = best;
 200     } else {
 201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 202                     (FF_LAMBDA_SHIFT + 7);
 203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
 204     }
 205
 206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 207                  FF_LAMBDA_SHIFT;
 208 }
 209
 210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 211 {
 212     int i;
 213
 214     if (matrix) {
 215         put_bits(pb, 1, 1);
 216         for (i = 0; i < 64; i++) {
 217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 218         }
 219     } else
 220         put_bits(pb, 1, 0);
 221 }
 222
 223 /**
 224  * init s->current_picture.qscale_table from s->lambda_table
 225  */
 226 void ff_init_qscale_tab(MpegEncContext *s)
 227 {
 228     int8_t * const qscale_table = s->current_picture.qscale_table;
 229     int i;
 230
 231     for (i = 0; i < s->mb_num; i++) {
 232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 235                                                   s->avctx->qmax);
 236     }
 237 }
 238
 239 static void update_duplicate_context_after_me(MpegEncContext *dst,
 240                                               MpegEncContext *src)
 241 {
 242 #define COPY(a) dst->a= src->a
 243     COPY(pict_type);
 244     COPY(current_picture);
 245     COPY(f_code);
 246     COPY(b_code);
 247     COPY(qscale);
 248     COPY(lambda);
 249     COPY(lambda2);
 250     COPY(picture_in_gop_number);
 251     COPY(gop_picture_number);
 252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 253     COPY(progressive_frame);    // FIXME don't set in encode_header
 254     COPY(partitioned_frame);    // FIXME don't set in encode_header
 255 #undef COPY
 256 }
 257
 258 static void mpv_encode_init_static(void)
 259 {
 260    for (int i = -16; i < 16; i++)
 261         default_fcode_tab[i + MAX_MV] = 1;
 262 }
 263
 264 /**
 265  * Set the given MpegEncContext to defaults for encoding.
 266  * the changed fields will not depend upon the prior state of the MpegEncContext.
 267  */
 268 static void mpv_encode_defaults(MpegEncContext *s)
 269 {
 270     static AVOnce init_static_once = AV_ONCE_INIT;
 271
 272     ff_mpv_common_defaults(s);
 273
 274     ff_thread_once(&init_static_once, mpv_encode_init_static);
 275
 276     s->me.mv_penalty = default_mv_penalty;
 277     s->fcode_tab     = default_fcode_tab;
 278
 279     s->input_picture_number  = 0;
 280     s->picture_in_gop_number = 0;
 281 }
 282
 283 av_cold int ff_dct_encode_init(MpegEncContext *s)
 284 {
 285     if (ARCH_X86)
 286         ff_dct_encode_init_x86(s);
 287
 288     if (CONFIG_H263_ENCODER)
 289         ff_h263dsp_init(&s->h263dsp);
 290     if (!s->dct_quantize)
 291         s->dct_quantize = ff_dct_quantize_c;
 292     if (!s->denoise_dct)
 293         s->denoise_dct  = denoise_dct_c;
 294     s->fast_dct_quantize = s->dct_quantize;
 295     if (s->avctx->trellis)
 296         s->dct_quantize  = dct_quantize_trellis_c;
 297
 298     return 0;
 299 }
 300
 301 /* init video encoder */
 302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 303 {
 304     MpegEncContext *s = avctx->priv_data;
 305     AVCPBProperties *cpb_props;
 306     int i, ret;
 307
 308     mpv_encode_defaults(s);
 309
 310     switch (avctx->pix_fmt) {
 311     case AV_PIX_FMT_YUVJ444P:
 312     case AV_PIX_FMT_YUV444P:
 313         s->chroma_format = CHROMA_444;
 314         break;
 315     case AV_PIX_FMT_YUVJ422P:
 316     case AV_PIX_FMT_YUV422P:
 317         s->chroma_format = CHROMA_422;
 318         break;
 319     case AV_PIX_FMT_YUVJ420P:
 320     case AV_PIX_FMT_YUV420P:
 321     default:
 322         s->chroma_format = CHROMA_420;
 323         break;
 324     }
 325
 326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
 327
 328     s->bit_rate = avctx->bit_rate;
 329     s->width    = avctx->width;
 330     s->height   = avctx->height;
 331     if (avctx->gop_size > 600 &&
 332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 333         av_log(avctx, AV_LOG_WARNING,
 334                "keyframe interval too large!, reducing it from %d to %d\n",
 335                avctx->gop_size, 600);
 336         avctx->gop_size = 600;
 337     }
 338     s->gop_size     = avctx->gop_size;
 339     s->avctx        = avctx;
 340     if (avctx->max_b_frames > MAX_B_FRAMES) {
 341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 342                "is %d.\n", MAX_B_FRAMES);
 343         avctx->max_b_frames = MAX_B_FRAMES;
 344     }
 345     s->max_b_frames = avctx->max_b_frames;
 346     s->codec_id     = avctx->codec->id;
 347     s->strict_std_compliance = avctx->strict_std_compliance;
 348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 349     s->rtp_mode           = !!s->rtp_payload_size;
 350     s->intra_dc_precision = avctx->intra_dc_precision;
 351
 352     // workaround some differences between how applications specify dc precision
 353     if (s->intra_dc_precision < 0) {
 354         s->intra_dc_precision += 8;
 355     } else if (s->intra_dc_precision >= 8)
 356         s->intra_dc_precision -= 8;
 357
 358     if (s->intra_dc_precision < 0) {
 359         av_log(avctx, AV_LOG_ERROR,
 360                 "intra dc precision must be positive, note some applications use"
 361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 362         return AVERROR(EINVAL);
 363     }
 364
 365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
 366         s->huffman = 0;
 367
 368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 370         return AVERROR(EINVAL);
 371     }
 372     s->user_specified_pts = AV_NOPTS_VALUE;
 373
 374     if (s->gop_size <= 1) {
 375         s->intra_only = 1;
 376         s->gop_size   = 12;
 377     } else {
 378         s->intra_only = 0;
 379     }
 380
 381     /* Fixed QSCALE */
 382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 383
 384     s->adaptive_quant = (avctx->lumi_masking ||
 385                          avctx->dark_masking ||
 386                          avctx->temporal_cplx_masking ||
 387                          avctx->spatial_cplx_masking  ||
 388                          avctx->p_masking      ||
 389                          s->border_masking ||
 390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 391                         !s->fixed_qscale;
 392
 393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 394
 395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 396         switch(avctx->codec_id) {
 397         case AV_CODEC_ID_MPEG1VIDEO:
 398         case AV_CODEC_ID_MPEG2VIDEO:
 399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 400             break;
 401         case AV_CODEC_ID_MPEG4:
 402         case AV_CODEC_ID_MSMPEG4V1:
 403         case AV_CODEC_ID_MSMPEG4V2:
 404         case AV_CODEC_ID_MSMPEG4V3:
 405             if       (avctx->rc_max_rate >= 15000000) {
 406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 407             } else if(avctx->rc_max_rate >=  2000000) {
 408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 409             } else if(avctx->rc_max_rate >=   384000) {
 410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 411             } else
 412                 avctx->rc_buffer_size = 40;
 413             avctx->rc_buffer_size *= 16384;
 414             break;
 415         }
 416         if (avctx->rc_buffer_size) {
 417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 418         }
 419     }
 420
 421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 423         return AVERROR(EINVAL);
 424     }
 425
 426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 427         av_log(avctx, AV_LOG_INFO,
 428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 429     }
 430
 431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 433         return AVERROR(EINVAL);
 434     }
 435
 436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 438         return AVERROR(EINVAL);
 439     }
 440
 441     if (avctx->rc_max_rate &&
 442         avctx->rc_max_rate == avctx->bit_rate &&
 443         avctx->rc_max_rate != avctx->rc_min_rate) {
 444         av_log(avctx, AV_LOG_INFO,
 445                "impossible bitrate constraints, this will fail\n");
 446     }
 447
 448     if (avctx->rc_buffer_size &&
 449         avctx->bit_rate * (int64_t)avctx->time_base.num >
 450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 452         return AVERROR(EINVAL);
 453     }
 454
 455     if (!s->fixed_qscale &&
 456         avctx->bit_rate * av_q2d(avctx->time_base) >
 457             avctx->bit_rate_tolerance) {
 458         av_log(avctx, AV_LOG_WARNING,
 459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 461     }
 462
 463     if (avctx->rc_max_rate &&
 464         avctx->rc_min_rate == avctx->rc_max_rate &&
 465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 467         90000LL * (avctx->rc_buffer_size - 1) >
 468             avctx->rc_max_rate * 0xFFFFLL) {
 469         av_log(avctx, AV_LOG_INFO,
 470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 471                "specified vbv buffer is too large for the given bitrate!\n");
 472     }
 473
 474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 476         s->codec_id != AV_CODEC_ID_FLV1) {
 477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 478         return AVERROR(EINVAL);
 479     }
 480
 481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 482         av_log(avctx, AV_LOG_ERROR,
 483                "OBMC is only supported with simple mb decision\n");
 484         return AVERROR(EINVAL);
 485     }
 486
 487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 489         return AVERROR(EINVAL);
 490     }
 491
 492     if (s->max_b_frames                    &&
 493         s->codec_id != AV_CODEC_ID_MPEG4      &&
 494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
 497         return AVERROR(EINVAL);
 498     }
 499     if (s->max_b_frames < 0) {
 500         av_log(avctx, AV_LOG_ERROR,
 501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 502         return AVERROR(EINVAL);
 503     }
 504
 505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 506          s->codec_id == AV_CODEC_ID_H263  ||
 507          s->codec_id == AV_CODEC_ID_H263P) &&
 508         (avctx->sample_aspect_ratio.num > 255 ||
 509          avctx->sample_aspect_ratio.den > 255)) {
 510         av_log(avctx, AV_LOG_WARNING,
 511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 515     }
 516
 517     if ((s->codec_id == AV_CODEC_ID_H263  ||
 518          s->codec_id == AV_CODEC_ID_H263P) &&
 519         (avctx->width  > 2048 ||
 520          avctx->height > 1152 )) {
 521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 522         return AVERROR(EINVAL);
 523     }
 524     if ((s->codec_id == AV_CODEC_ID_H263  ||
 525          s->codec_id == AV_CODEC_ID_H263P ||
 526          s->codec_id == AV_CODEC_ID_RV20) &&
 527         ((avctx->width &3) ||
 528          (avctx->height&3) )) {
 529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 530         return AVERROR(EINVAL);
 531     }
 532
 533     if (s->codec_id == AV_CODEC_ID_RV10 &&
 534         (avctx->width &15 ||
 535          avctx->height&15 )) {
 536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 537         return AVERROR(EINVAL);
 538     }
 539
 540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 541          s->codec_id == AV_CODEC_ID_WMV2) &&
 542          avctx->width & 1) {
 543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 544         return AVERROR(EINVAL);
 545     }
 546
 547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 550         return AVERROR(EINVAL);
 551     }
 552
 553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 555         return AVERROR(EINVAL);
 556     }
 557
 558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 559         avctx->mb_decision != FF_MB_DECISION_RD) {
 560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 561         return AVERROR(EINVAL);
 562     }
 563
 564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 565             (s->codec_id == AV_CODEC_ID_AMV ||
 566              s->codec_id == AV_CODEC_ID_MJPEG)) {
 567         // Used to produce garbage with MJPEG.
 568         av_log(avctx, AV_LOG_ERROR,
 569                "QP RD is no longer compatible with MJPEG or AMV\n");
 570         return AVERROR(EINVAL);
 571     }
 572
 573     if (s->scenechange_threshold < 1000000000 &&
 574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "closed gop with scene change detection are not supported yet, "
 577                "set threshold to 1000000000\n");
 578         return AVERROR_PATCHWELCOME;
 579     }
 580
 581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 584             av_log(avctx, AV_LOG_ERROR,
 585                    "low delay forcing is only available for mpeg2, "
 586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
 587             return AVERROR(EINVAL);
 588         }
 589         if (s->max_b_frames != 0) {
 590             av_log(avctx, AV_LOG_ERROR,
 591                    "B-frames cannot be used with low delay\n");
 592             return AVERROR(EINVAL);
 593         }
 594     }
 595
 596     if (s->q_scale_type == 1) {
 597         if (avctx->qmax > 28) {
 598             av_log(avctx, AV_LOG_ERROR,
 599                    "non linear quant only supports qmax <= 28 currently\n");
 600             return AVERROR_PATCHWELCOME;
 601         }
 602     }
 603
 604     if (avctx->slices > 1 &&
 605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 607         return AVERROR(EINVAL);
 608     }
 609
 610     if (avctx->thread_count > 1         &&
 611         s->codec_id != AV_CODEC_ID_MPEG4      &&
 612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 614         s->codec_id != AV_CODEC_ID_MJPEG      &&
 615         (s->codec_id != AV_CODEC_ID_H263P)) {
 616         av_log(avctx, AV_LOG_ERROR,
 617                "multi threaded encoding not supported by codec\n");
 618         return AVERROR_PATCHWELCOME;
 619     }
 620
 621     if (avctx->thread_count < 1) {
 622         av_log(avctx, AV_LOG_ERROR,
 623                "automatic thread number detection not supported by codec, "
 624                "patch welcome\n");
 625         return AVERROR_PATCHWELCOME;
 626     }
 627
 628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 629         av_log(avctx, AV_LOG_INFO,
 630                "notice: b_frame_strategy only affects the first pass\n");
 631         s->b_frame_strategy = 0;
 632     }
 633
 634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 635     if (i > 1) {
 636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 637         avctx->time_base.den /= i;
 638         avctx->time_base.num /= i;
 639         //return -1;
 640     }
 641
 642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 643         // (a + x * 3 / 8) / x
 644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 645         s->inter_quant_bias = 0;
 646     } else {
 647         s->intra_quant_bias = 0;
 648         // (a - x / 4) / x
 649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 650     }
 651
 652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 654         return AVERROR(EINVAL);
 655     }
 656
 657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 658
 659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 660         avctx->time_base.den > (1 << 16) - 1) {
 661         av_log(avctx, AV_LOG_ERROR,
 662                "timebase %d/%d not supported by MPEG 4 standard, "
 663                "the maximum admitted value for the timebase denominator "
 664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
 665                (1 << 16) - 1);
 666         return AVERROR(EINVAL);
 667     }
 668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
 669
 670     switch (avctx->codec->id) {
 671     case AV_CODEC_ID_MPEG1VIDEO:
 672         s->out_format = FMT_MPEG1;
 673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 675         break;
 676     case AV_CODEC_ID_MPEG2VIDEO:
 677         s->out_format = FMT_MPEG1;
 678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 680         s->rtp_mode   = 1;
 681         break;
 682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
 683     case AV_CODEC_ID_MJPEG:
 684     case AV_CODEC_ID_AMV:
 685         s->out_format = FMT_MJPEG;
 686         s->intra_only = 1; /* force intra only for jpeg */
 687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
 688             return ret;
 689         avctx->delay = 0;
 690         s->low_delay = 1;
 691         break;
 692 #endif
 693     case AV_CODEC_ID_SPEEDHQ:
 694         s->out_format = FMT_SPEEDHQ;
 695         s->intra_only = 1; /* force intra only for SHQ */
 696         if (!CONFIG_SPEEDHQ_ENCODER)
 697             return AVERROR_ENCODER_NOT_FOUND;
 698         if ((ret = ff_speedhq_encode_init(s)) < 0)
 699             return ret;
 700         avctx->delay = 0;
 701         s->low_delay = 1;
 702         break;
 703     case AV_CODEC_ID_H261:
 704         if (!CONFIG_H261_ENCODER)
 705             return AVERROR_ENCODER_NOT_FOUND;
 706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 707             av_log(avctx, AV_LOG_ERROR,
 708                    "The specified picture size of %dx%d is not valid for the "
 709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 710                     s->width, s->height);
 711             return AVERROR(EINVAL);
 712         }
 713         s->out_format = FMT_H261;
 714         avctx->delay  = 0;
 715         s->low_delay  = 1;
 716         s->rtp_mode   = 0; /* Sliced encoding not supported */
 717         break;
 718     case AV_CODEC_ID_H263:
 719         if (!CONFIG_H263_ENCODER)
 720             return AVERROR_ENCODER_NOT_FOUND;
 721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 722                              s->width, s->height) == 8) {
 723             av_log(avctx, AV_LOG_ERROR,
 724                    "The specified picture size of %dx%d is not valid for "
 725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 726                    "352x288, 704x576, and 1408x1152. "
 727                    "Try H.263+.\n", s->width, s->height);
 728             return AVERROR(EINVAL);
 729         }
 730         s->out_format = FMT_H263;
 731         avctx->delay  = 0;
 732         s->low_delay  = 1;
 733         break;
 734     case AV_CODEC_ID_H263P:
 735         s->out_format = FMT_H263;
 736         s->h263_plus  = 1;
 737         /* Fx */
 738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 739         s->modified_quant  = s->h263_aic;
 740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 742
 743         /* /Fx */
 744         /* These are just to be sure */
 745         avctx->delay = 0;
 746         s->low_delay = 1;
 747         break;
 748     case AV_CODEC_ID_FLV1:
 749         s->out_format      = FMT_H263;
 750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 751         s->unrestricted_mv = 1;
 752         s->rtp_mode  = 0; /* don't allow GOB */
 753         avctx->delay = 0;
 754         s->low_delay = 1;
 755         break;
 756     case AV_CODEC_ID_RV10:
 757         s->out_format = FMT_H263;
 758         avctx->delay  = 0;
 759         s->low_delay  = 1;
 760         break;
 761     case AV_CODEC_ID_RV20:
 762         s->out_format      = FMT_H263;
 763         avctx->delay       = 0;
 764         s->low_delay       = 1;
 765         s->modified_quant  = 1;
 766         s->h263_aic        = 1;
 767         s->h263_plus       = 1;
 768         s->loop_filter     = 1;
 769         s->unrestricted_mv = 0;
 770         break;
 771     case AV_CODEC_ID_MPEG4:
 772         s->out_format      = FMT_H263;
 773         s->h263_pred       = 1;
 774         s->unrestricted_mv = 1;
 775         s->low_delay       = s->max_b_frames ? 0 : 1;
 776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 777         break;
 778     case AV_CODEC_ID_MSMPEG4V2:
 779         s->out_format      = FMT_H263;
 780         s->h263_pred       = 1;
 781         s->unrestricted_mv = 1;
 782         s->msmpeg4_version = 2;
 783         avctx->delay       = 0;
 784         s->low_delay       = 1;
 785         break;
 786     case AV_CODEC_ID_MSMPEG4V3:
 787         s->out_format        = FMT_H263;
 788         s->h263_pred         = 1;
 789         s->unrestricted_mv   = 1;
 790         s->msmpeg4_version   = 3;
 791         s->flipflop_rounding = 1;
 792         avctx->delay         = 0;
 793         s->low_delay         = 1;
 794         break;
 795     case AV_CODEC_ID_WMV1:
 796         s->out_format        = FMT_H263;
 797         s->h263_pred         = 1;
 798         s->unrestricted_mv   = 1;
 799         s->msmpeg4_version   = 4;
 800         s->flipflop_rounding = 1;
 801         avctx->delay         = 0;
 802         s->low_delay         = 1;
 803         break;
 804     case AV_CODEC_ID_WMV2:
 805         s->out_format        = FMT_H263;
 806         s->h263_pred         = 1;
 807         s->unrestricted_mv   = 1;
 808         s->msmpeg4_version   = 5;
 809         s->flipflop_rounding = 1;
 810         avctx->delay         = 0;
 811         s->low_delay         = 1;
 812         break;
 813     default:
 814         return AVERROR(EINVAL);
 815     }
 816
 817     avctx->has_b_frames = !s->low_delay;
 818
 819     s->encoding = 1;
 820
 821     s->progressive_frame    =
 822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 824                                 s->alternate_scan);
 825
 826     /* init */
 827     ff_mpv_idct_init(s);
 828     if ((ret = ff_mpv_common_init(s)) < 0)
 829         return ret;
 830
 831     ff_fdctdsp_init(&s->fdsp, avctx);
 832     ff_me_cmp_init(&s->mecc, avctx);
 833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 834     ff_pixblockdsp_init(&s->pdsp, avctx);
 835     ff_qpeldsp_init(&s->qdsp);
 836
 837     if (s->msmpeg4_version) {
 838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
 839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
 840             return AVERROR(ENOMEM);
 841     }
 842
 843     if (!(avctx->stats_out = av_mallocz(256))               ||
 844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
 845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
 846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
 847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
 848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
 849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
 850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
 851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
 852         return AVERROR(ENOMEM);
 853
 854     if (s->noise_reduction) {
 855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
 856             return AVERROR(ENOMEM);
 857     }
 858
 859     ff_dct_encode_init(s);
 860
 861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 863
 864     if (s->slice_context_count > 1) {
 865         s->rtp_mode = 1;
 866
 867         if (avctx->codec_id == AV_CODEC_ID_H263P)
 868             s->h263_slice_structured = 1;
 869     }
 870
 871     s->quant_precision = 5;
 872
 873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
 874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 875
 876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 877         ff_h261_encode_init(s);
 878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 879         ff_h263_encode_init(s);
 880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 881         ff_msmpeg4_encode_init(s);
 882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 883         && s->out_format == FMT_MPEG1)
 884         ff_mpeg1_encode_init(s);
 885
 886     /* init q matrix */
 887     for (i = 0; i < 64; i++) {
 888         int j = s->idsp.idct_permutation[i];
 889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 890             s->mpeg_quant) {
 891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 894             s->intra_matrix[j] =
 895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 897             s->intra_matrix[j] =
 898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 899         } else {
 900             /* MPEG-1/2 */
 901             s->chroma_intra_matrix[j] =
 902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 904         }
 905         if (avctx->intra_matrix)
 906             s->intra_matrix[j] = avctx->intra_matrix[i];
 907         if (avctx->inter_matrix)
 908             s->inter_matrix[j] = avctx->inter_matrix[i];
 909     }
 910
 911     /* precompute matrix */
 912     /* for mjpeg, we do include qscale in the matrix */
 913     if (s->out_format != FMT_MJPEG) {
 914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 916                           31, 1);
 917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 919                           31, 0);
 920     }
 921
 922     if ((ret = ff_rate_control_init(s)) < 0)
 923         return ret;
 924
 925     if (s->b_frame_strategy == 2) {
 926         for (i = 0; i < s->max_b_frames + 2; i++) {
 927             s->tmp_frames[i] = av_frame_alloc();
 928             if (!s->tmp_frames[i])
 929                 return AVERROR(ENOMEM);
 930
 931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 934
 935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
 936             if (ret < 0)
 937                 return ret;
 938         }
 939     }
 940
 941     cpb_props = ff_add_cpb_side_data(avctx);
 942     if (!cpb_props)
 943         return AVERROR(ENOMEM);
 944     cpb_props->max_bitrate = avctx->rc_max_rate;
 945     cpb_props->min_bitrate = avctx->rc_min_rate;
 946     cpb_props->avg_bitrate = avctx->bit_rate;
 947     cpb_props->buffer_size = avctx->rc_buffer_size;
 948
 949     return 0;
 950 }
 951
 952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 953 {
 954     MpegEncContext *s = avctx->priv_data;
 955     int i;
 956
 957     ff_rate_control_uninit(s);
 958
 959     ff_mpv_common_end(s);
 960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
 961         s->out_format == FMT_MJPEG)
 962         ff_mjpeg_encode_close(s);
 963
 964     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 965         av_frame_free(&s->tmp_frames[i]);
 966
 967     ff_free_picture_tables(&s->new_picture);
 968     ff_mpeg_unref_picture(avctx, &s->new_picture);
 969
 970     av_freep(&avctx->stats_out);
 971     av_freep(&s->ac_stats);
 972
 973     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 974     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 975     s->q_chroma_intra_matrix=   NULL;
 976     s->q_chroma_intra_matrix16= NULL;
 977     av_freep(&s->q_intra_matrix);
 978     av_freep(&s->q_inter_matrix);
 979     av_freep(&s->q_intra_matrix16);
 980     av_freep(&s->q_inter_matrix16);
 981     av_freep(&s->input_picture);
 982     av_freep(&s->reordered_input_picture);
 983     av_freep(&s->dct_offset);
 984
 985     return 0;
 986 }
 987
 988 static int get_sae(uint8_t *src, int ref, int stride)
 989 {
 990     int x,y;
 991     int acc = 0;
 992
 993     for (y = 0; y < 16; y++) {
 994         for (x = 0; x < 16; x++) {
 995             acc += FFABS(src[x + y * stride] - ref);
 996         }
 997     }
 998
 999     return acc;
1000 }
1001
1002 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1003                            uint8_t *ref, int stride)
1004 {
1005     int x, y, w, h;
1006     int acc = 0;
1007
1008     w = s->width  & ~15;
1009     h = s->height & ~15;
1010
1011     for (y = 0; y < h; y += 16) {
1012         for (x = 0; x < w; x += 16) {
1013             int offset = x + y * stride;
1014             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1015                                       stride, 16);
1016             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1017             int sae  = get_sae(src + offset, mean, stride);
1018
1019             acc += sae + 500 < sad;
1020         }
1021     }
1022     return acc;
1023 }
1024
1025 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1026 {
1027     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1028                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1029                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1030                             &s->linesize, &s->uvlinesize);
1031 }
1032
1033 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1034 {
1035     Picture *pic = NULL;
1036     int64_t pts;
1037     int i, display_picture_number = 0, ret;
1038     int encoding_delay = s->max_b_frames ? s->max_b_frames
1039                                          : (s->low_delay ? 0 : 1);
1040     int flush_offset = 1;
1041     int direct = 1;
1042
1043     if (pic_arg) {
1044         pts = pic_arg->pts;
1045         display_picture_number = s->input_picture_number++;
1046
1047         if (pts != AV_NOPTS_VALUE) {
1048             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1049                 int64_t last = s->user_specified_pts;
1050
1051                 if (pts <= last) {
1052                     av_log(s->avctx, AV_LOG_ERROR,
1053                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1054                            pts, last);
1055                     return AVERROR(EINVAL);
1056                 }
1057
1058                 if (!s->low_delay && display_picture_number == 1)
1059                     s->dts_delta = pts - last;
1060             }
1061             s->user_specified_pts = pts;
1062         } else {
1063             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1064                 s->user_specified_pts =
1065                 pts = s->user_specified_pts + 1;
1066                 av_log(s->avctx, AV_LOG_INFO,
1067                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1068                        pts);
1069             } else {
1070                 pts = display_picture_number;
1071             }
1072         }
1073
1074         if (!pic_arg->buf[0] ||
1075             pic_arg->linesize[0] != s->linesize ||
1076             pic_arg->linesize[1] != s->uvlinesize ||
1077             pic_arg->linesize[2] != s->uvlinesize)
1078             direct = 0;
1079         if ((s->width & 15) || (s->height & 15))
1080             direct = 0;
1081         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1082             direct = 0;
1083         if (s->linesize & (STRIDE_ALIGN-1))
1084             direct = 0;
1085
1086         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1087                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1088
1089         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1090         if (i < 0)
1091             return i;
1092
1093         pic = &s->picture[i];
1094         pic->reference = 3;
1095
1096         if (direct) {
1097             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1098                 return ret;
1099         }
1100         ret = alloc_picture(s, pic, direct);
1101         if (ret < 0)
1102             return ret;
1103
1104         if (!direct) {
1105             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1106                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1107                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1108                 // empty
1109             } else {
1110                 int h_chroma_shift, v_chroma_shift;
1111                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1112                                                  &h_chroma_shift,
1113                                                  &v_chroma_shift);
1114
1115                 for (i = 0; i < 3; i++) {
1116                     int src_stride = pic_arg->linesize[i];
1117                     int dst_stride = i ? s->uvlinesize : s->linesize;
1118                     int h_shift = i ? h_chroma_shift : 0;
1119                     int v_shift = i ? v_chroma_shift : 0;
1120                     int w = s->width  >> h_shift;
1121                     int h = s->height >> v_shift;
1122                     uint8_t *src = pic_arg->data[i];
1123                     uint8_t *dst = pic->f->data[i];
1124                     int vpad = 16;
1125
1126                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1127                         && !s->progressive_sequence
1128                         && FFALIGN(s->height, 32) - s->height > 16)
1129                         vpad = 32;
1130
1131                     if (!s->avctx->rc_buffer_size)
1132                         dst += INPLACE_OFFSET;
1133
1134                     if (src_stride == dst_stride)
1135                         memcpy(dst, src, src_stride * h);
1136                     else {
1137                         int h2 = h;
1138                         uint8_t *dst2 = dst;
1139                         while (h2--) {
1140                             memcpy(dst2, src, w);
1141                             dst2 += dst_stride;
1142                             src += src_stride;
1143                         }
1144                     }
1145                     if ((s->width & 15) || (s->height & (vpad-1))) {
1146                         s->mpvencdsp.draw_edges(dst, dst_stride,
1147                                                 w, h,
1148                                                 16 >> h_shift,
1149                                                 vpad >> v_shift,
1150                                                 EDGE_BOTTOM);
1151                     }
1152                 }
1153                 emms_c();
1154             }
1155         }
1156         ret = av_frame_copy_props(pic->f, pic_arg);
1157         if (ret < 0)
1158             return ret;
1159
1160         pic->f->display_picture_number = display_picture_number;
1161         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1162     } else {
1163         /* Flushing: When we have not received enough input frames,
1164          * ensure s->input_picture[0] contains the first picture */
1165         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1166             if (s->input_picture[flush_offset])
1167                 break;
1168
1169         if (flush_offset <= 1)
1170             flush_offset = 1;
1171         else
1172             encoding_delay = encoding_delay - flush_offset + 1;
1173     }
1174
1175     /* shift buffer entries */
1176     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1177         s->input_picture[i - flush_offset] = s->input_picture[i];
1178
1179     s->input_picture[encoding_delay] = (Picture*) pic;
1180
1181     return 0;
1182 }
1183
1184 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1185 {
1186     int x, y, plane;
1187     int score = 0;
1188     int64_t score64 = 0;
1189
1190     for (plane = 0; plane < 3; plane++) {
1191         const int stride = p->f->linesize[plane];
1192         const int bw = plane ? 1 : 2;
1193         for (y = 0; y < s->mb_height * bw; y++) {
1194             for (x = 0; x < s->mb_width * bw; x++) {
1195                 int off = p->shared ? 0 : 16;
1196                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1197                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1198                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1199
1200                 switch (FFABS(s->frame_skip_exp)) {
1201                 case 0: score    =  FFMAX(score, v);          break;
1202                 case 1: score   += FFABS(v);                  break;
1203                 case 2: score64 += v * (int64_t)v;                       break;
1204                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1205                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1206                 }
1207             }
1208         }
1209     }
1210     emms_c();
1211
1212     if (score)
1213         score64 = score;
1214     if (s->frame_skip_exp < 0)
1215         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1216                       -1.0/s->frame_skip_exp);
1217
1218     if (score64 < s->frame_skip_threshold)
1219         return 1;
1220     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1221         return 1;
1222     return 0;
1223 }
1224
1225 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1226 {
1227     int ret;
1228     int size = 0;
1229
1230     ret = avcodec_send_frame(c, frame);
1231     if (ret < 0)
1232         return ret;
1233
1234     do {
1235         ret = avcodec_receive_packet(c, pkt);
1236         if (ret >= 0) {
1237             size += pkt->size;
1238             av_packet_unref(pkt);
1239         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1240             return ret;
1241     } while (ret >= 0);
1242
1243     return size;
1244 }
1245
1246 static int estimate_best_b_count(MpegEncContext *s)
1247 {
1248     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1249     AVPacket *pkt;
1250     const int scale = s->brd_scale;
1251     int width  = s->width  >> scale;
1252     int height = s->height >> scale;
1253     int i, j, out_size, p_lambda, b_lambda, lambda2;
1254     int64_t best_rd  = INT64_MAX;
1255     int best_b_count = -1;
1256     int ret = 0;
1257
1258     av_assert0(scale >= 0 && scale <= 3);
1259
1260     pkt = av_packet_alloc();
1261     if (!pkt)
1262         return AVERROR(ENOMEM);
1263
1264     //emms_c();
1265     //s->next_picture_ptr->quality;
1266     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1267     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1268     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1269     if (!b_lambda) // FIXME we should do this somewhere else
1270         b_lambda = p_lambda;
1271     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1272                FF_LAMBDA_SHIFT;
1273
1274     for (i = 0; i < s->max_b_frames + 2; i++) {
1275         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1276                                                 s->next_picture_ptr;
1277         uint8_t *data[4];
1278
1279         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1280             pre_input = *pre_input_ptr;
1281             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1282
1283             if (!pre_input.shared && i) {
1284                 data[0] += INPLACE_OFFSET;
1285                 data[1] += INPLACE_OFFSET;
1286                 data[2] += INPLACE_OFFSET;
1287             }
1288
1289             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1290                                        s->tmp_frames[i]->linesize[0],
1291                                        data[0],
1292                                        pre_input.f->linesize[0],
1293                                        width, height);
1294             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1295                                        s->tmp_frames[i]->linesize[1],
1296                                        data[1],
1297                                        pre_input.f->linesize[1],
1298                                        width >> 1, height >> 1);
1299             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1300                                        s->tmp_frames[i]->linesize[2],
1301                                        data[2],
1302                                        pre_input.f->linesize[2],
1303                                        width >> 1, height >> 1);
1304         }
1305     }
1306
1307     for (j = 0; j < s->max_b_frames + 1; j++) {
1308         AVCodecContext *c;
1309         int64_t rd = 0;
1310
1311         if (!s->input_picture[j])
1312             break;
1313
1314         c = avcodec_alloc_context3(NULL);
1315         if (!c) {
1316             ret = AVERROR(ENOMEM);
1317             goto fail;
1318         }
1319
1320         c->width        = width;
1321         c->height       = height;
1322         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1323         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1324         c->mb_decision  = s->avctx->mb_decision;
1325         c->me_cmp       = s->avctx->me_cmp;
1326         c->mb_cmp       = s->avctx->mb_cmp;
1327         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1328         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1329         c->time_base    = s->avctx->time_base;
1330         c->max_b_frames = s->max_b_frames;
1331
1332         ret = avcodec_open2(c, codec, NULL);
1333         if (ret < 0)
1334             goto fail;
1335
1336
1337         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1338         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1339
1340         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1341         if (out_size < 0) {
1342             ret = out_size;
1343             goto fail;
1344         }
1345
1346         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1347
1348         for (i = 0; i < s->max_b_frames + 1; i++) {
1349             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1350
1351             s->tmp_frames[i + 1]->pict_type = is_p ?
1352                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1353             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1354
1355             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1356             if (out_size < 0) {
1357                 ret = out_size;
1358                 goto fail;
1359             }
1360
1361             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1362         }
1363
1364         /* get the delayed frames */
1365         out_size = encode_frame(c, NULL, pkt);
1366         if (out_size < 0) {
1367             ret = out_size;
1368             goto fail;
1369         }
1370         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1371
1372         rd += c->error[0] + c->error[1] + c->error[2];
1373
1374         if (rd < best_rd) {
1375             best_rd = rd;
1376             best_b_count = j;
1377         }
1378
1379 fail:
1380         avcodec_free_context(&c);
1381         av_packet_unref(pkt);
1382         if (ret < 0) {
1383             best_b_count = ret;
1384             break;
1385         }
1386     }
1387
1388     av_packet_free(&pkt);
1389
1390     return best_b_count;
1391 }
1392
1393 static int select_input_picture(MpegEncContext *s)
1394 {
1395     int i, ret;
1396
1397     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1398         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1399     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1400
1401     /* set next picture type & ordering */
1402     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1403         if (s->frame_skip_threshold || s->frame_skip_factor) {
1404             if (s->picture_in_gop_number < s->gop_size &&
1405                 s->next_picture_ptr &&
1406                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1407                 // FIXME check that the gop check above is +-1 correct
1408                 av_frame_unref(s->input_picture[0]->f);
1409
1410                 ff_vbv_update(s, 0);
1411
1412                 goto no_output_pic;
1413             }
1414         }
1415
1416         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1417             !s->next_picture_ptr || s->intra_only) {
1418             s->reordered_input_picture[0] = s->input_picture[0];
1419             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1420             s->reordered_input_picture[0]->f->coded_picture_number =
1421                 s->coded_picture_number++;
1422         } else {
1423             int b_frames = 0;
1424
1425             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1426                 for (i = 0; i < s->max_b_frames + 1; i++) {
1427                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1428
1429                     if (pict_num >= s->rc_context.num_entries)
1430                         break;
1431                     if (!s->input_picture[i]) {
1432                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1433                         break;
1434                     }
1435
1436                     s->input_picture[i]->f->pict_type =
1437                         s->rc_context.entry[pict_num].new_pict_type;
1438                 }
1439             }
1440
1441             if (s->b_frame_strategy == 0) {
1442                 b_frames = s->max_b_frames;
1443                 while (b_frames && !s->input_picture[b_frames])
1444                     b_frames--;
1445             } else if (s->b_frame_strategy == 1) {
1446                 for (i = 1; i < s->max_b_frames + 1; i++) {
1447                     if (s->input_picture[i] &&
1448                         s->input_picture[i]->b_frame_score == 0) {
1449                         s->input_picture[i]->b_frame_score =
1450                             get_intra_count(s,
1451                                             s->input_picture[i    ]->f->data[0],
1452                                             s->input_picture[i - 1]->f->data[0],
1453                                             s->linesize) + 1;
1454                     }
1455                 }
1456                 for (i = 0; i < s->max_b_frames + 1; i++) {
1457                     if (!s->input_picture[i] ||
1458                         s->input_picture[i]->b_frame_score - 1 >
1459                             s->mb_num / s->b_sensitivity)
1460                         break;
1461                 }
1462
1463                 b_frames = FFMAX(0, i - 1);
1464
1465                 /* reset scores */
1466                 for (i = 0; i < b_frames + 1; i++) {
1467                     s->input_picture[i]->b_frame_score = 0;
1468                 }
1469             } else if (s->b_frame_strategy == 2) {
1470                 b_frames = estimate_best_b_count(s);
1471                 if (b_frames < 0)
1472                     return b_frames;
1473             }
1474
1475             emms_c();
1476
1477             for (i = b_frames - 1; i >= 0; i--) {
1478                 int type = s->input_picture[i]->f->pict_type;
1479                 if (type && type != AV_PICTURE_TYPE_B)
1480                     b_frames = i;
1481             }
1482             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1483                 b_frames == s->max_b_frames) {
1484                 av_log(s->avctx, AV_LOG_ERROR,
1485                        "warning, too many B-frames in a row\n");
1486             }
1487
1488             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1489                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1490                     s->gop_size > s->picture_in_gop_number) {
1491                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1492                 } else {
1493                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1494                         b_frames = 0;
1495                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1496                 }
1497             }
1498
1499             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1500                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1501                 b_frames--;
1502
1503             s->reordered_input_picture[0] = s->input_picture[b_frames];
1504             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1505                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1506             s->reordered_input_picture[0]->f->coded_picture_number =
1507                 s->coded_picture_number++;
1508             for (i = 0; i < b_frames; i++) {
1509                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1510                 s->reordered_input_picture[i + 1]->f->pict_type =
1511                     AV_PICTURE_TYPE_B;
1512                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1513                     s->coded_picture_number++;
1514             }
1515         }
1516     }
1517 no_output_pic:
1518     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1519
1520     if (s->reordered_input_picture[0]) {
1521         s->reordered_input_picture[0]->reference =
1522            s->reordered_input_picture[0]->f->pict_type !=
1523                AV_PICTURE_TYPE_B ? 3 : 0;
1524
1525         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1526             return ret;
1527
1528         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1529             // input is a shared pix, so we can't modify it -> allocate a new
1530             // one & ensure that the shared one is reuseable
1531
1532             Picture *pic;
1533             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1534             if (i < 0)
1535                 return i;
1536             pic = &s->picture[i];
1537
1538             pic->reference = s->reordered_input_picture[0]->reference;
1539             if (alloc_picture(s, pic, 0) < 0) {
1540                 return -1;
1541             }
1542
1543             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1544             if (ret < 0)
1545                 return ret;
1546
1547             /* mark us unused / free shared pic */
1548             av_frame_unref(s->reordered_input_picture[0]->f);
1549             s->reordered_input_picture[0]->shared = 0;
1550
1551             s->current_picture_ptr = pic;
1552         } else {
1553             // input is not a shared pix -> reuse buffer for current_pix
1554             s->current_picture_ptr = s->reordered_input_picture[0];
1555             for (i = 0; i < 4; i++) {
1556                 if (s->new_picture.f->data[i])
1557                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1558             }
1559         }
1560         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1561         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1562                                        s->current_picture_ptr)) < 0)
1563             return ret;
1564
1565         s->picture_number = s->new_picture.f->display_picture_number;
1566     }
1567     return 0;
1568 }
1569
1570 static void frame_end(MpegEncContext *s)
1571 {
1572     if (s->unrestricted_mv &&
1573         s->current_picture.reference &&
1574         !s->intra_only) {
1575         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1576         int hshift = desc->log2_chroma_w;
1577         int vshift = desc->log2_chroma_h;
1578         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1579                                 s->current_picture.f->linesize[0],
1580                                 s->h_edge_pos, s->v_edge_pos,
1581                                 EDGE_WIDTH, EDGE_WIDTH,
1582                                 EDGE_TOP | EDGE_BOTTOM);
1583         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1584                                 s->current_picture.f->linesize[1],
1585                                 s->h_edge_pos >> hshift,
1586                                 s->v_edge_pos >> vshift,
1587                                 EDGE_WIDTH >> hshift,
1588                                 EDGE_WIDTH >> vshift,
1589                                 EDGE_TOP | EDGE_BOTTOM);
1590         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1591                                 s->current_picture.f->linesize[2],
1592                                 s->h_edge_pos >> hshift,
1593                                 s->v_edge_pos >> vshift,
1594                                 EDGE_WIDTH >> hshift,
1595                                 EDGE_WIDTH >> vshift,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597     }
1598
1599     emms_c();
1600
1601     s->last_pict_type                 = s->pict_type;
1602     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1603     if (s->pict_type!= AV_PICTURE_TYPE_B)
1604         s->last_non_b_pict_type = s->pict_type;
1605 }
1606
1607 static void update_noise_reduction(MpegEncContext *s)
1608 {
1609     int intra, i;
1610
1611     for (intra = 0; intra < 2; intra++) {
1612         if (s->dct_count[intra] > (1 << 16)) {
1613             for (i = 0; i < 64; i++) {
1614                 s->dct_error_sum[intra][i] >>= 1;
1615             }
1616             s->dct_count[intra] >>= 1;
1617         }
1618
1619         for (i = 0; i < 64; i++) {
1620             s->dct_offset[intra][i] = (s->noise_reduction *
1621                                        s->dct_count[intra] +
1622                                        s->dct_error_sum[intra][i] / 2) /
1623                                       (s->dct_error_sum[intra][i] + 1);
1624         }
1625     }
1626 }
1627
1628 static int frame_start(MpegEncContext *s)
1629 {
1630     int ret;
1631
1632     /* mark & release old frames */
1633     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1634         s->last_picture_ptr != s->next_picture_ptr &&
1635         s->last_picture_ptr->f->buf[0]) {
1636         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1637     }
1638
1639     s->current_picture_ptr->f->pict_type = s->pict_type;
1640     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1641
1642     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1643     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1644                                    s->current_picture_ptr)) < 0)
1645         return ret;
1646
1647     if (s->pict_type != AV_PICTURE_TYPE_B) {
1648         s->last_picture_ptr = s->next_picture_ptr;
1649         if (!s->droppable)
1650             s->next_picture_ptr = s->current_picture_ptr;
1651     }
1652
1653     if (s->last_picture_ptr) {
1654         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1655         if (s->last_picture_ptr->f->buf[0] &&
1656             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1657                                        s->last_picture_ptr)) < 0)
1658             return ret;
1659     }
1660     if (s->next_picture_ptr) {
1661         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1662         if (s->next_picture_ptr->f->buf[0] &&
1663             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1664                                        s->next_picture_ptr)) < 0)
1665             return ret;
1666     }
1667
1668     if (s->picture_structure!= PICT_FRAME) {
1669         int i;
1670         for (i = 0; i < 4; i++) {
1671             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1672                 s->current_picture.f->data[i] +=
1673                     s->current_picture.f->linesize[i];
1674             }
1675             s->current_picture.f->linesize[i] *= 2;
1676             s->last_picture.f->linesize[i]    *= 2;
1677             s->next_picture.f->linesize[i]    *= 2;
1678         }
1679     }
1680
1681     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1682         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1683         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1684     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1685         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1686         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1687     } else {
1688         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1689         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1690     }
1691
1692     if (s->dct_error_sum) {
1693         av_assert2(s->noise_reduction && s->encoding);
1694         update_noise_reduction(s);
1695     }
1696
1697     return 0;
1698 }
1699
1700 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1701                           const AVFrame *pic_arg, int *got_packet)
1702 {
1703     MpegEncContext *s = avctx->priv_data;
1704     int i, stuffing_count, ret;
1705     int context_count = s->slice_context_count;
1706
1707     s->vbv_ignore_qmax = 0;
1708
1709     s->picture_in_gop_number++;
1710
1711     if (load_input_picture(s, pic_arg) < 0)
1712         return -1;
1713
1714     if (select_input_picture(s) < 0) {
1715         return -1;
1716     }
1717
1718     /* output? */
1719     if (s->new_picture.f->data[0]) {
1720         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1721         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1722                                               :
1723                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1724         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1725             return ret;
1726         if (s->mb_info) {
1727             s->mb_info_ptr = av_packet_new_side_data(pkt,
1728                                  AV_PKT_DATA_H263_MB_INFO,
1729                                  s->mb_width*s->mb_height*12);
1730             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1731         }
1732
1733         for (i = 0; i < context_count; i++) {
1734             int start_y = s->thread_context[i]->start_mb_y;
1735             int   end_y = s->thread_context[i]->  end_mb_y;
1736             int h       = s->mb_height;
1737             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1738             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1739
1740             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1741         }
1742
1743         s->pict_type = s->new_picture.f->pict_type;
1744         //emms_c();
1745         ret = frame_start(s);
1746         if (ret < 0)
1747             return ret;
1748 vbv_retry:
1749         ret = encode_picture(s, s->picture_number);
1750         if (growing_buffer) {
1751             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1752             pkt->data = s->pb.buf;
1753             pkt->size = avctx->internal->byte_buffer_size;
1754         }
1755         if (ret < 0)
1756             return -1;
1757
1758         frame_end(s);
1759
1760        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1761             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1762
1763         if (avctx->rc_buffer_size) {
1764             RateControlContext *rcc = &s->rc_context;
1765             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1766             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1767             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1768
1769             if (put_bits_count(&s->pb) > max_size &&
1770                 s->lambda < s->lmax) {
1771                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1772                                        (s->qscale + 1) / s->qscale);
1773                 if (s->adaptive_quant) {
1774                     int i;
1775                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1776                         s->lambda_table[i] =
1777                             FFMAX(s->lambda_table[i] + min_step,
1778                                   s->lambda_table[i] * (s->qscale + 1) /
1779                                   s->qscale);
1780                 }
1781                 s->mb_skipped = 0;        // done in frame_start()
1782                 // done in encode_picture() so we must undo it
1783                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1784                     if (s->flipflop_rounding          ||
1785                         s->codec_id == AV_CODEC_ID_H263P ||
1786                         s->codec_id == AV_CODEC_ID_MPEG4)
1787                         s->no_rounding ^= 1;
1788                 }
1789                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1790                     s->time_base       = s->last_time_base;
1791                     s->last_non_b_time = s->time - s->pp_time;
1792                 }
1793                 for (i = 0; i < context_count; i++) {
1794                     PutBitContext *pb = &s->thread_context[i]->pb;
1795                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1796                 }
1797                 s->vbv_ignore_qmax = 1;
1798                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1799                 goto vbv_retry;
1800             }
1801
1802             av_assert0(avctx->rc_max_rate);
1803         }
1804
1805         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1806             ff_write_pass1_stats(s);
1807
1808         for (i = 0; i < 4; i++) {
1809             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1810             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1811         }
1812         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1813                                        s->current_picture_ptr->encoding_error,
1814                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1815                                        s->pict_type);
1816
1817         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1818             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1819                                              s->misc_bits + s->i_tex_bits +
1820                                              s->p_tex_bits);
1821         flush_put_bits(&s->pb);
1822         s->frame_bits  = put_bits_count(&s->pb);
1823
1824         stuffing_count = ff_vbv_update(s, s->frame_bits);
1825         s->stuffing_bits = 8*stuffing_count;
1826         if (stuffing_count) {
1827             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1828                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1829                 return -1;
1830             }
1831
1832             switch (s->codec_id) {
1833             case AV_CODEC_ID_MPEG1VIDEO:
1834             case AV_CODEC_ID_MPEG2VIDEO:
1835                 while (stuffing_count--) {
1836                     put_bits(&s->pb, 8, 0);
1837                 }
1838             break;
1839             case AV_CODEC_ID_MPEG4:
1840                 put_bits(&s->pb, 16, 0);
1841                 put_bits(&s->pb, 16, 0x1C3);
1842                 stuffing_count -= 4;
1843                 while (stuffing_count--) {
1844                     put_bits(&s->pb, 8, 0xFF);
1845                 }
1846             break;
1847             default:
1848                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1849             }
1850             flush_put_bits(&s->pb);
1851             s->frame_bits  = put_bits_count(&s->pb);
1852         }
1853
1854         /* update MPEG-1/2 vbv_delay for CBR */
1855         if (avctx->rc_max_rate                          &&
1856             avctx->rc_min_rate == avctx->rc_max_rate &&
1857             s->out_format == FMT_MPEG1                     &&
1858             90000LL * (avctx->rc_buffer_size - 1) <=
1859                 avctx->rc_max_rate * 0xFFFFLL) {
1860             AVCPBProperties *props;
1861             size_t props_size;
1862
1863             int vbv_delay, min_delay;
1864             double inbits  = avctx->rc_max_rate *
1865                              av_q2d(avctx->time_base);
1866             int    minbits = s->frame_bits - 8 *
1867                              (s->vbv_delay_ptr - s->pb.buf - 1);
1868             double bits    = s->rc_context.buffer_index + minbits - inbits;
1869
1870             if (bits < 0)
1871                 av_log(avctx, AV_LOG_ERROR,
1872                        "Internal error, negative bits\n");
1873
1874             av_assert1(s->repeat_first_field == 0);
1875
1876             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1877             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1878                         avctx->rc_max_rate;
1879
1880             vbv_delay = FFMAX(vbv_delay, min_delay);
1881
1882             av_assert0(vbv_delay < 0xFFFF);
1883
1884             s->vbv_delay_ptr[0] &= 0xF8;
1885             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1886             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1887             s->vbv_delay_ptr[2] &= 0x07;
1888             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1889
1890             props = av_cpb_properties_alloc(&props_size);
1891             if (!props)
1892                 return AVERROR(ENOMEM);
1893             props->vbv_delay = vbv_delay * 300;
1894
1895             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1896                                           (uint8_t*)props, props_size);
1897             if (ret < 0) {
1898                 av_freep(&props);
1899                 return ret;
1900             }
1901         }
1902         s->total_bits     += s->frame_bits;
1903
1904         pkt->pts = s->current_picture.f->pts;
1905         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1906             if (!s->current_picture.f->coded_picture_number)
1907                 pkt->dts = pkt->pts - s->dts_delta;
1908             else
1909                 pkt->dts = s->reordered_pts;
1910             s->reordered_pts = pkt->pts;
1911         } else
1912             pkt->dts = pkt->pts;
1913         if (s->current_picture.f->key_frame)
1914             pkt->flags |= AV_PKT_FLAG_KEY;
1915         if (s->mb_info)
1916             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1917     } else {
1918         s->frame_bits = 0;
1919     }
1920
1921     /* release non-reference frames */
1922     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1923         if (!s->picture[i].reference)
1924             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1925     }
1926
1927     av_assert1((s->frame_bits & 7) == 0);
1928
1929     pkt->size = s->frame_bits / 8;
1930     *got_packet = !!pkt->size;
1931     return 0;
1932 }
1933
1934 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1935                                                 int n, int threshold)
1936 {
1937     static const char tab[64] = {
1938         3, 2, 2, 1, 1, 1, 1, 1,
1939         1, 1, 1, 1, 1, 1, 1, 1,
1940         1, 1, 1, 1, 1, 1, 1, 1,
1941         0, 0, 0, 0, 0, 0, 0, 0,
1942         0, 0, 0, 0, 0, 0, 0, 0,
1943         0, 0, 0, 0, 0, 0, 0, 0,
1944         0, 0, 0, 0, 0, 0, 0, 0,
1945         0, 0, 0, 0, 0, 0, 0, 0
1946     };
1947     int score = 0;
1948     int run = 0;
1949     int i;
1950     int16_t *block = s->block[n];
1951     const int last_index = s->block_last_index[n];
1952     int skip_dc;
1953
1954     if (threshold < 0) {
1955         skip_dc = 0;
1956         threshold = -threshold;
1957     } else
1958         skip_dc = 1;
1959
1960     /* Are all we could set to zero already zero? */
1961     if (last_index <= skip_dc - 1)
1962         return;
1963
1964     for (i = 0; i <= last_index; i++) {
1965         const int j = s->intra_scantable.permutated[i];
1966         const int level = FFABS(block[j]);
1967         if (level == 1) {
1968             if (skip_dc && i == 0)
1969                 continue;
1970             score += tab[run];
1971             run = 0;
1972         } else if (level > 1) {
1973             return;
1974         } else {
1975             run++;
1976         }
1977     }
1978     if (score >= threshold)
1979         return;
1980     for (i = skip_dc; i <= last_index; i++) {
1981         const int j = s->intra_scantable.permutated[i];
1982         block[j] = 0;
1983     }
1984     if (block[0])
1985         s->block_last_index[n] = 0;
1986     else
1987         s->block_last_index[n] = -1;
1988 }
1989
1990 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1991                                int last_index)
1992 {
1993     int i;
1994     const int maxlevel = s->max_qcoeff;
1995     const int minlevel = s->min_qcoeff;
1996     int overflow = 0;
1997
1998     if (s->mb_intra) {
1999         i = 1; // skip clipping of intra dc
2000     } else
2001         i = 0;
2002
2003     for (; i <= last_index; i++) {
2004         const int j = s->intra_scantable.permutated[i];
2005         int level = block[j];
2006
2007         if (level > maxlevel) {
2008             level = maxlevel;
2009             overflow++;
2010         } else if (level < minlevel) {
2011             level = minlevel;
2012             overflow++;
2013         }
2014
2015         block[j] = level;
2016     }
2017
2018     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2019         av_log(s->avctx, AV_LOG_INFO,
2020                "warning, clipping %d dct coefficients to %d..%d\n",
2021                overflow, minlevel, maxlevel);
2022 }
2023
2024 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2025 {
2026     int x, y;
2027     // FIXME optimize
2028     for (y = 0; y < 8; y++) {
2029         for (x = 0; x < 8; x++) {
2030             int x2, y2;
2031             int sum = 0;
2032             int sqr = 0;
2033             int count = 0;
2034
2035             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2036                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2037                     int v = ptr[x2 + y2 * stride];
2038                     sum += v;
2039                     sqr += v * v;
2040                     count++;
2041                 }
2042             }
2043             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2044         }
2045     }
2046 }
2047
2048 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2049                                                 int motion_x, int motion_y,
2050                                                 int mb_block_height,
2051                                                 int mb_block_width,
2052                                                 int mb_block_count)
2053 {
2054     int16_t weight[12][64];
2055     int16_t orig[12][64];
2056     const int mb_x = s->mb_x;
2057     const int mb_y = s->mb_y;
2058     int i;
2059     int skip_dct[12];
2060     int dct_offset = s->linesize * 8; // default for progressive frames
2061     int uv_dct_offset = s->uvlinesize * 8;
2062     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2063     ptrdiff_t wrap_y, wrap_c;
2064
2065     for (i = 0; i < mb_block_count; i++)
2066         skip_dct[i] = s->skipdct;
2067
2068     if (s->adaptive_quant) {
2069         const int last_qp = s->qscale;
2070         const int mb_xy = mb_x + mb_y * s->mb_stride;
2071
2072         s->lambda = s->lambda_table[mb_xy];
2073         update_qscale(s);
2074
2075         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2076             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2077             s->dquant = s->qscale - last_qp;
2078
2079             if (s->out_format == FMT_H263) {
2080                 s->dquant = av_clip(s->dquant, -2, 2);
2081
2082                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2083                     if (!s->mb_intra) {
2084                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2085                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2086                                 s->dquant = 0;
2087                         }
2088                         if (s->mv_type == MV_TYPE_8X8)
2089                             s->dquant = 0;
2090                     }
2091                 }
2092             }
2093         }
2094         ff_set_qscale(s, last_qp + s->dquant);
2095     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2096         ff_set_qscale(s, s->qscale + s->dquant);
2097
2098     wrap_y = s->linesize;
2099     wrap_c = s->uvlinesize;
2100     ptr_y  = s->new_picture.f->data[0] +
2101              (mb_y * 16 * wrap_y)              + mb_x * 16;
2102     ptr_cb = s->new_picture.f->data[1] +
2103              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2104     ptr_cr = s->new_picture.f->data[2] +
2105              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2106
2107     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2108         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2109         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2110         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2111         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2112                                  wrap_y, wrap_y,
2113                                  16, 16, mb_x * 16, mb_y * 16,
2114                                  s->width, s->height);
2115         ptr_y = ebuf;
2116         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2117                                  wrap_c, wrap_c,
2118                                  mb_block_width, mb_block_height,
2119                                  mb_x * mb_block_width, mb_y * mb_block_height,
2120                                  cw, ch);
2121         ptr_cb = ebuf + 16 * wrap_y;
2122         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2123                                  wrap_c, wrap_c,
2124                                  mb_block_width, mb_block_height,
2125                                  mb_x * mb_block_width, mb_y * mb_block_height,
2126                                  cw, ch);
2127         ptr_cr = ebuf + 16 * wrap_y + 16;
2128     }
2129
2130     if (s->mb_intra) {
2131         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2132             int progressive_score, interlaced_score;
2133
2134             s->interlaced_dct = 0;
2135             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2136                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2137                                                      NULL, wrap_y, 8) - 400;
2138
2139             if (progressive_score > 0) {
2140                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2141                                                         NULL, wrap_y * 2, 8) +
2142                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2143                                                         NULL, wrap_y * 2, 8);
2144                 if (progressive_score > interlaced_score) {
2145                     s->interlaced_dct = 1;
2146
2147                     dct_offset = wrap_y;
2148                     uv_dct_offset = wrap_c;
2149                     wrap_y <<= 1;
2150                     if (s->chroma_format == CHROMA_422 ||
2151                         s->chroma_format == CHROMA_444)
2152                         wrap_c <<= 1;
2153                 }
2154             }
2155         }
2156
2157         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2158         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2159         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2160         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2161
2162         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2163             skip_dct[4] = 1;
2164             skip_dct[5] = 1;
2165         } else {
2166             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2167             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2168             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2169                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2171             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2172                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2173                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2174                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2175                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2176                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2177                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2178             }
2179         }
2180     } else {
2181         op_pixels_func (*op_pix)[4];
2182         qpel_mc_func (*op_qpix)[16];
2183         uint8_t *dest_y, *dest_cb, *dest_cr;
2184
2185         dest_y  = s->dest[0];
2186         dest_cb = s->dest[1];
2187         dest_cr = s->dest[2];
2188
2189         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2190             op_pix  = s->hdsp.put_pixels_tab;
2191             op_qpix = s->qdsp.put_qpel_pixels_tab;
2192         } else {
2193             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2194             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2195         }
2196
2197         if (s->mv_dir & MV_DIR_FORWARD) {
2198             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2199                           s->last_picture.f->data,
2200                           op_pix, op_qpix);
2201             op_pix  = s->hdsp.avg_pixels_tab;
2202             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2203         }
2204         if (s->mv_dir & MV_DIR_BACKWARD) {
2205             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2206                           s->next_picture.f->data,
2207                           op_pix, op_qpix);
2208         }
2209
2210         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2211             int progressive_score, interlaced_score;
2212
2213             s->interlaced_dct = 0;
2214             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2215                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2216                                                      ptr_y + wrap_y * 8,
2217                                                      wrap_y, 8) - 400;
2218
2219             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2220                 progressive_score -= 400;
2221
2222             if (progressive_score > 0) {
2223                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2224                                                         wrap_y * 2, 8) +
2225                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2226                                                         ptr_y + wrap_y,
2227                                                         wrap_y * 2, 8);
2228
2229                 if (progressive_score > interlaced_score) {
2230                     s->interlaced_dct = 1;
2231
2232                     dct_offset = wrap_y;
2233                     uv_dct_offset = wrap_c;
2234                     wrap_y <<= 1;
2235                     if (s->chroma_format == CHROMA_422)
2236                         wrap_c <<= 1;
2237                 }
2238             }
2239         }
2240
2241         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2242         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2243         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2244                             dest_y + dct_offset, wrap_y);
2245         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2246                             dest_y + dct_offset + 8, wrap_y);
2247
2248         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2249             skip_dct[4] = 1;
2250             skip_dct[5] = 1;
2251         } else {
2252             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2253             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2254             if (!s->chroma_y_shift) { /* 422 */
2255                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2256                                     dest_cb + uv_dct_offset, wrap_c);
2257                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2258                                     dest_cr + uv_dct_offset, wrap_c);
2259             }
2260         }
2261         /* pre quantization */
2262         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2263                 2 * s->qscale * s->qscale) {
2264             // FIXME optimize
2265             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2266                 skip_dct[0] = 1;
2267             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[1] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2270                                wrap_y, 8) < 20 * s->qscale)
2271                 skip_dct[2] = 1;
2272             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2273                                wrap_y, 8) < 20 * s->qscale)
2274                 skip_dct[3] = 1;
2275             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2276                 skip_dct[4] = 1;
2277             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2278                 skip_dct[5] = 1;
2279             if (!s->chroma_y_shift) { /* 422 */
2280                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2281                                    dest_cb + uv_dct_offset,
2282                                    wrap_c, 8) < 20 * s->qscale)
2283                     skip_dct[6] = 1;
2284                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2285                                    dest_cr + uv_dct_offset,
2286                                    wrap_c, 8) < 20 * s->qscale)
2287                     skip_dct[7] = 1;
2288             }
2289         }
2290     }
2291
2292     if (s->quantizer_noise_shaping) {
2293         if (!skip_dct[0])
2294             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2295         if (!skip_dct[1])
2296             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2297         if (!skip_dct[2])
2298             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2299         if (!skip_dct[3])
2300             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2301         if (!skip_dct[4])
2302             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2303         if (!skip_dct[5])
2304             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2305         if (!s->chroma_y_shift) { /* 422 */
2306             if (!skip_dct[6])
2307                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2308                                   wrap_c);
2309             if (!skip_dct[7])
2310                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2311                                   wrap_c);
2312         }
2313         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2314     }
2315
2316     /* DCT & quantize */
2317     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2318     {
2319         for (i = 0; i < mb_block_count; i++) {
2320             if (!skip_dct[i]) {
2321                 int overflow;
2322                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2323                 // FIXME we could decide to change to quantizer instead of
2324                 // clipping
2325                 // JS: I don't think that would be a good idea it could lower
2326                 //     quality instead of improve it. Just INTRADC clipping
2327                 //     deserves changes in quantizer
2328                 if (overflow)
2329                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2330             } else
2331                 s->block_last_index[i] = -1;
2332         }
2333         if (s->quantizer_noise_shaping) {
2334             for (i = 0; i < mb_block_count; i++) {
2335                 if (!skip_dct[i]) {
2336                     s->block_last_index[i] =
2337                         dct_quantize_refine(s, s->block[i], weight[i],
2338                                             orig[i], i, s->qscale);
2339                 }
2340             }
2341         }
2342
2343         if (s->luma_elim_threshold && !s->mb_intra)
2344             for (i = 0; i < 4; i++)
2345                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2346         if (s->chroma_elim_threshold && !s->mb_intra)
2347             for (i = 4; i < mb_block_count; i++)
2348                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2349
2350         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2351             for (i = 0; i < mb_block_count; i++) {
2352                 if (s->block_last_index[i] == -1)
2353                     s->coded_score[i] = INT_MAX / 256;
2354             }
2355         }
2356     }
2357
2358     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2359         s->block_last_index[4] =
2360         s->block_last_index[5] = 0;
2361         s->block[4][0] =
2362         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2363         if (!s->chroma_y_shift) { /* 422 / 444 */
2364             for (i=6; i<12; i++) {
2365                 s->block_last_index[i] = 0;
2366                 s->block[i][0] = s->block[4][0];
2367             }
2368         }
2369     }
2370
2371     // non c quantize code returns incorrect block_last_index FIXME
2372     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2373         for (i = 0; i < mb_block_count; i++) {
2374             int j;
2375             if (s->block_last_index[i] > 0) {
2376                 for (j = 63; j > 0; j--) {
2377                     if (s->block[i][s->intra_scantable.permutated[j]])
2378                         break;
2379                 }
2380                 s->block_last_index[i] = j;
2381             }
2382         }
2383     }
2384
2385     /* huffman encode */
2386     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2387     case AV_CODEC_ID_MPEG1VIDEO:
2388     case AV_CODEC_ID_MPEG2VIDEO:
2389         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2390             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2391         break;
2392     case AV_CODEC_ID_MPEG4:
2393         if (CONFIG_MPEG4_ENCODER)
2394             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2395         break;
2396     case AV_CODEC_ID_MSMPEG4V2:
2397     case AV_CODEC_ID_MSMPEG4V3:
2398     case AV_CODEC_ID_WMV1:
2399         if (CONFIG_MSMPEG4_ENCODER)
2400             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2401         break;
2402     case AV_CODEC_ID_WMV2:
2403         if (CONFIG_WMV2_ENCODER)
2404             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2405         break;
2406     case AV_CODEC_ID_H261:
2407         if (CONFIG_H261_ENCODER)
2408             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2409         break;
2410     case AV_CODEC_ID_H263:
2411     case AV_CODEC_ID_H263P:
2412     case AV_CODEC_ID_FLV1:
2413     case AV_CODEC_ID_RV10:
2414     case AV_CODEC_ID_RV20:
2415         if (CONFIG_H263_ENCODER)
2416             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2417         break;
2418 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2419     case AV_CODEC_ID_MJPEG:
2420     case AV_CODEC_ID_AMV:
2421         ff_mjpeg_encode_mb(s, s->block);
2422         break;
2423 #endif
2424     case AV_CODEC_ID_SPEEDHQ:
2425         if (CONFIG_SPEEDHQ_ENCODER)
2426             ff_speedhq_encode_mb(s, s->block);
2427         break;
2428     default:
2429         av_assert1(0);
2430     }
2431 }
2432
2433 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2434 {
2435     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2436     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2437     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2438 }
2439
2440 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2441     int i;
2442
2443     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2444
2445     /* MPEG-1 */
2446     d->mb_skip_run= s->mb_skip_run;
2447     for(i=0; i<3; i++)
2448         d->last_dc[i] = s->last_dc[i];
2449
2450     /* statistics */
2451     d->mv_bits= s->mv_bits;
2452     d->i_tex_bits= s->i_tex_bits;
2453     d->p_tex_bits= s->p_tex_bits;
2454     d->i_count= s->i_count;
2455     d->f_count= s->f_count;
2456     d->b_count= s->b_count;
2457     d->skip_count= s->skip_count;
2458     d->misc_bits= s->misc_bits;
2459     d->last_bits= 0;
2460
2461     d->mb_skipped= 0;
2462     d->qscale= s->qscale;
2463     d->dquant= s->dquant;
2464
2465     d->esc3_level_length= s->esc3_level_length;
2466 }
2467
2468 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2469     int i;
2470
2471     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2472     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2473
2474     /* MPEG-1 */
2475     d->mb_skip_run= s->mb_skip_run;
2476     for(i=0; i<3; i++)
2477         d->last_dc[i] = s->last_dc[i];
2478
2479     /* statistics */
2480     d->mv_bits= s->mv_bits;
2481     d->i_tex_bits= s->i_tex_bits;
2482     d->p_tex_bits= s->p_tex_bits;
2483     d->i_count= s->i_count;
2484     d->f_count= s->f_count;
2485     d->b_count= s->b_count;
2486     d->skip_count= s->skip_count;
2487     d->misc_bits= s->misc_bits;
2488
2489     d->mb_intra= s->mb_intra;
2490     d->mb_skipped= s->mb_skipped;
2491     d->mv_type= s->mv_type;
2492     d->mv_dir= s->mv_dir;
2493     d->pb= s->pb;
2494     if(s->data_partitioning){
2495         d->pb2= s->pb2;
2496         d->tex_pb= s->tex_pb;
2497     }
2498     d->block= s->block;
2499     for(i=0; i<8; i++)
2500         d->block_last_index[i]= s->block_last_index[i];
2501     d->interlaced_dct= s->interlaced_dct;
2502     d->qscale= s->qscale;
2503
2504     d->esc3_level_length= s->esc3_level_length;
2505 }
2506
2507 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2508                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2509                            int *dmin, int *next_block, int motion_x, int motion_y)
2510 {
2511     int score;
2512     uint8_t *dest_backup[3];
2513
2514     copy_context_before_encode(s, backup, type);
2515
2516     s->block= s->blocks[*next_block];
2517     s->pb= pb[*next_block];
2518     if(s->data_partitioning){
2519         s->pb2   = pb2   [*next_block];
2520         s->tex_pb= tex_pb[*next_block];
2521     }
2522
2523     if(*next_block){
2524         memcpy(dest_backup, s->dest, sizeof(s->dest));
2525         s->dest[0] = s->sc.rd_scratchpad;
2526         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2527         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2528         av_assert0(s->linesize >= 32); //FIXME
2529     }
2530
2531     encode_mb(s, motion_x, motion_y);
2532
2533     score= put_bits_count(&s->pb);
2534     if(s->data_partitioning){
2535         score+= put_bits_count(&s->pb2);
2536         score+= put_bits_count(&s->tex_pb);
2537     }
2538
2539     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2540         ff_mpv_reconstruct_mb(s, s->block);
2541
2542         score *= s->lambda2;
2543         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2544     }
2545
2546     if(*next_block){
2547         memcpy(s->dest, dest_backup, sizeof(s->dest));
2548     }
2549
2550     if(score<*dmin){
2551         *dmin= score;
2552         *next_block^=1;
2553
2554         copy_context_after_encode(best, s, type);
2555     }
2556 }
2557
2558 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2559     const uint32_t *sq = ff_square_tab + 256;
2560     int acc=0;
2561     int x,y;
2562
2563     if(w==16 && h==16)
2564         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2565     else if(w==8 && h==8)
2566         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2567
2568     for(y=0; y<h; y++){
2569         for(x=0; x<w; x++){
2570             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2571         }
2572     }
2573
2574     av_assert2(acc>=0);
2575
2576     return acc;
2577 }
2578
2579 static int sse_mb(MpegEncContext *s){
2580     int w= 16;
2581     int h= 16;
2582
2583     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2584     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2585
2586     if(w==16 && h==16)
2587       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2588         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2589                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2590                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2591       }else{
2592         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2593                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2594                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2595       }
2596     else
2597         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2598                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2599                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2600 }
2601
2602 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2603     MpegEncContext *s= *(void**)arg;
2604
2605
2606     s->me.pre_pass=1;
2607     s->me.dia_size= s->avctx->pre_dia_size;
2608     s->first_slice_line=1;
2609     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2610         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2611             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2612         }
2613         s->first_slice_line=0;
2614     }
2615
2616     s->me.pre_pass=0;
2617
2618     return 0;
2619 }
2620
2621 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2622     MpegEncContext *s= *(void**)arg;
2623
2624     s->me.dia_size= s->avctx->dia_size;
2625     s->first_slice_line=1;
2626     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2627         s->mb_x=0; //for block init below
2628         ff_init_block_index(s);
2629         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2630             s->block_index[0]+=2;
2631             s->block_index[1]+=2;
2632             s->block_index[2]+=2;
2633             s->block_index[3]+=2;
2634
2635             /* compute motion vector & mb_type and store in context */
2636             if(s->pict_type==AV_PICTURE_TYPE_B)
2637                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2638             else
2639                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2640         }
2641         s->first_slice_line=0;
2642     }
2643     return 0;
2644 }
2645
2646 static int mb_var_thread(AVCodecContext *c, void *arg){
2647     MpegEncContext *s= *(void**)arg;
2648     int mb_x, mb_y;
2649
2650     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2651         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2652             int xx = mb_x * 16;
2653             int yy = mb_y * 16;
2654             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2655             int varc;
2656             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2657
2658             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2659                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2660
2661             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2662             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2663             s->me.mb_var_sum_temp    += varc;
2664         }
2665     }
2666     return 0;
2667 }
2668
2669 static void write_slice_end(MpegEncContext *s){
2670     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2671         if(s->partitioned_frame){
2672             ff_mpeg4_merge_partitions(s);
2673         }
2674
2675         ff_mpeg4_stuffing(&s->pb);
2676     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2677                s->out_format == FMT_MJPEG) {
2678         ff_mjpeg_encode_stuffing(s);
2679     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2680         ff_speedhq_end_slice(s);
2681     }
2682
2683     flush_put_bits(&s->pb);
2684
2685     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2686         s->misc_bits+= get_bits_diff(s);
2687 }
2688
2689 static void write_mb_info(MpegEncContext *s)
2690 {
2691     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2692     int offset = put_bits_count(&s->pb);
2693     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2694     int gobn = s->mb_y / s->gob_index;
2695     int pred_x, pred_y;
2696     if (CONFIG_H263_ENCODER)
2697         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2698     bytestream_put_le32(&ptr, offset);
2699     bytestream_put_byte(&ptr, s->qscale);
2700     bytestream_put_byte(&ptr, gobn);
2701     bytestream_put_le16(&ptr, mba);
2702     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2703     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2704     /* 4MV not implemented */
2705     bytestream_put_byte(&ptr, 0); /* hmv2 */
2706     bytestream_put_byte(&ptr, 0); /* vmv2 */
2707 }
2708
2709 static void update_mb_info(MpegEncContext *s, int startcode)
2710 {
2711     if (!s->mb_info)
2712         return;
2713     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2714         s->mb_info_size += 12;
2715         s->prev_mb_info = s->last_mb_info;
2716     }
2717     if (startcode) {
2718         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2719         /* This might have incremented mb_info_size above, and we return without
2720          * actually writing any info into that slot yet. But in that case,
2721          * this will be called again at the start of the after writing the
2722          * start code, actually writing the mb info. */
2723         return;
2724     }
2725
2726     s->last_mb_info = put_bytes_count(&s->pb, 0);
2727     if (!s->mb_info_size)
2728         s->mb_info_size += 12;
2729     write_mb_info(s);
2730 }
2731
2732 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2733 {
2734     if (put_bytes_left(&s->pb, 0) < threshold
2735         && s->slice_context_count == 1
2736         && s->pb.buf == s->avctx->internal->byte_buffer) {
2737         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2738         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2739
2740         uint8_t *new_buffer = NULL;
2741         int new_buffer_size = 0;
2742
2743         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2744             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2745             return AVERROR(ENOMEM);
2746         }
2747
2748         emms_c();
2749
2750         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2751                               s->avctx->internal->byte_buffer_size + size_increase);
2752         if (!new_buffer)
2753             return AVERROR(ENOMEM);
2754
2755         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2756         av_free(s->avctx->internal->byte_buffer);
2757         s->avctx->internal->byte_buffer      = new_buffer;
2758         s->avctx->internal->byte_buffer_size = new_buffer_size;
2759         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2760         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2761         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2762     }
2763     if (put_bytes_left(&s->pb, 0) < threshold)
2764         return AVERROR(EINVAL);
2765     return 0;
2766 }
2767
2768 static int encode_thread(AVCodecContext *c, void *arg){
2769     MpegEncContext *s= *(void**)arg;
2770     int mb_x, mb_y, mb_y_order;
2771     int chr_h= 16>>s->chroma_y_shift;
2772     int i, j;
2773     MpegEncContext best_s = { 0 }, backup_s;
2774     uint8_t bit_buf[2][MAX_MB_BYTES];
2775     uint8_t bit_buf2[2][MAX_MB_BYTES];
2776     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2777     PutBitContext pb[2], pb2[2], tex_pb[2];
2778
2779     for(i=0; i<2; i++){
2780         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2781         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2782         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2783     }
2784
2785     s->last_bits= put_bits_count(&s->pb);
2786     s->mv_bits=0;
2787     s->misc_bits=0;
2788     s->i_tex_bits=0;
2789     s->p_tex_bits=0;
2790     s->i_count=0;
2791     s->f_count=0;
2792     s->b_count=0;
2793     s->skip_count=0;
2794
2795     for(i=0; i<3; i++){
2796         /* init last dc values */
2797         /* note: quant matrix value (8) is implied here */
2798         s->last_dc[i] = 128 << s->intra_dc_precision;
2799
2800         s->current_picture.encoding_error[i] = 0;
2801     }
2802     if(s->codec_id==AV_CODEC_ID_AMV){
2803         s->last_dc[0] = 128*8/13;
2804         s->last_dc[1] = 128*8/14;
2805         s->last_dc[2] = 128*8/14;
2806     }
2807     s->mb_skip_run = 0;
2808     memset(s->last_mv, 0, sizeof(s->last_mv));
2809
2810     s->last_mv_dir = 0;
2811
2812     switch(s->codec_id){
2813     case AV_CODEC_ID_H263:
2814     case AV_CODEC_ID_H263P:
2815     case AV_CODEC_ID_FLV1:
2816         if (CONFIG_H263_ENCODER)
2817             s->gob_index = H263_GOB_HEIGHT(s->height);
2818         break;
2819     case AV_CODEC_ID_MPEG4:
2820         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2821             ff_mpeg4_init_partitions(s);
2822         break;
2823     }
2824
2825     s->resync_mb_x=0;
2826     s->resync_mb_y=0;
2827     s->first_slice_line = 1;
2828     s->ptr_lastgob = s->pb.buf;
2829     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2830         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2831             int first_in_slice;
2832             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2833             if (first_in_slice && mb_y_order != s->start_mb_y)
2834                 ff_speedhq_end_slice(s);
2835             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2836         } else {
2837             mb_y = mb_y_order;
2838         }
2839         s->mb_x=0;
2840         s->mb_y= mb_y;
2841
2842         ff_set_qscale(s, s->qscale);
2843         ff_init_block_index(s);
2844
2845         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2846             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2847             int mb_type= s->mb_type[xy];
2848 //            int d;
2849             int dmin= INT_MAX;
2850             int dir;
2851             int size_increase =  s->avctx->internal->byte_buffer_size/4
2852                                + s->mb_width*MAX_MB_BYTES;
2853
2854             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2855             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2856                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2857                 return -1;
2858             }
2859             if(s->data_partitioning){
2860                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2861                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2862                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2863                     return -1;
2864                 }
2865             }
2866
2867             s->mb_x = mb_x;
2868             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2869             ff_update_block_index(s);
2870
2871             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2872                 ff_h261_reorder_mb_index(s);
2873                 xy= s->mb_y*s->mb_stride + s->mb_x;
2874                 mb_type= s->mb_type[xy];
2875             }
2876
2877             /* write gob / video packet header  */
2878             if(s->rtp_mode){
2879                 int current_packet_size, is_gob_start;
2880
2881                 current_packet_size = put_bytes_count(&s->pb, 1)
2882                                       - (s->ptr_lastgob - s->pb.buf);
2883
2884                 is_gob_start = s->rtp_payload_size &&
2885                                current_packet_size >= s->rtp_payload_size &&
2886                                mb_y + mb_x > 0;
2887
2888                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2889
2890                 switch(s->codec_id){
2891                 case AV_CODEC_ID_H263:
2892                 case AV_CODEC_ID_H263P:
2893                     if(!s->h263_slice_structured)
2894                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2895                     break;
2896                 case AV_CODEC_ID_MPEG2VIDEO:
2897                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2898                 case AV_CODEC_ID_MPEG1VIDEO:
2899                     if(s->mb_skip_run) is_gob_start=0;
2900                     break;
2901                 case AV_CODEC_ID_MJPEG:
2902                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2903                     break;
2904                 }
2905
2906                 if(is_gob_start){
2907                     if(s->start_mb_y != mb_y || mb_x!=0){
2908                         write_slice_end(s);
2909
2910                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2911                             ff_mpeg4_init_partitions(s);
2912                         }
2913                     }
2914
2915                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2916                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2917
2918                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2919                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2920                         int d = 100 / s->error_rate;
2921                         if(r % d == 0){
2922                             current_packet_size=0;
2923                             s->pb.buf_ptr= s->ptr_lastgob;
2924                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2925                         }
2926                     }
2927
2928                     update_mb_info(s, 1);
2929
2930                     switch(s->codec_id){
2931                     case AV_CODEC_ID_MPEG4:
2932                         if (CONFIG_MPEG4_ENCODER) {
2933                             ff_mpeg4_encode_video_packet_header(s);
2934                             ff_mpeg4_clean_buffers(s);
2935                         }
2936                     break;
2937                     case AV_CODEC_ID_MPEG1VIDEO:
2938                     case AV_CODEC_ID_MPEG2VIDEO:
2939                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2940                             ff_mpeg1_encode_slice_header(s);
2941                             ff_mpeg1_clean_buffers(s);
2942                         }
2943                     break;
2944                     case AV_CODEC_ID_H263:
2945                     case AV_CODEC_ID_H263P:
2946                         if (CONFIG_H263_ENCODER)
2947                             ff_h263_encode_gob_header(s, mb_y);
2948                     break;
2949                     }
2950
2951                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2952                         int bits= put_bits_count(&s->pb);
2953                         s->misc_bits+= bits - s->last_bits;
2954                         s->last_bits= bits;
2955                     }
2956
2957                     s->ptr_lastgob += current_packet_size;
2958                     s->first_slice_line=1;
2959                     s->resync_mb_x=mb_x;
2960                     s->resync_mb_y=mb_y;
2961                 }
2962             }
2963
2964             if(  (s->resync_mb_x   == s->mb_x)
2965                && s->resync_mb_y+1 == s->mb_y){
2966                 s->first_slice_line=0;
2967             }
2968
2969             s->mb_skipped=0;
2970             s->dquant=0; //only for QP_RD
2971
2972             update_mb_info(s, 0);
2973
2974             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2975                 int next_block=0;
2976                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2977
2978                 copy_context_before_encode(&backup_s, s, -1);
2979                 backup_s.pb= s->pb;
2980                 best_s.data_partitioning= s->data_partitioning;
2981                 best_s.partitioned_frame= s->partitioned_frame;
2982                 if(s->data_partitioning){
2983                     backup_s.pb2= s->pb2;
2984                     backup_s.tex_pb= s->tex_pb;
2985                 }
2986
2987                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_16X16;
2990                     s->mb_intra= 0;
2991                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2992                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2993                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2994                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2995                 }
2996                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2997                     s->mv_dir = MV_DIR_FORWARD;
2998                     s->mv_type = MV_TYPE_FIELD;
2999                     s->mb_intra= 0;
3000                     for(i=0; i<2; i++){
3001                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3002                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3003                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3004                     }
3005                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3006                                  &dmin, &next_block, 0, 0);
3007                 }
3008                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3009                     s->mv_dir = MV_DIR_FORWARD;
3010                     s->mv_type = MV_TYPE_16X16;
3011                     s->mb_intra= 0;
3012                     s->mv[0][0][0] = 0;
3013                     s->mv[0][0][1] = 0;
3014                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3015                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3016                 }
3017                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3018                     s->mv_dir = MV_DIR_FORWARD;
3019                     s->mv_type = MV_TYPE_8X8;
3020                     s->mb_intra= 0;
3021                     for(i=0; i<4; i++){
3022                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3023                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3024                     }
3025                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3026                                  &dmin, &next_block, 0, 0);
3027                 }
3028                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3029                     s->mv_dir = MV_DIR_FORWARD;
3030                     s->mv_type = MV_TYPE_16X16;
3031                     s->mb_intra= 0;
3032                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3033                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3035                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3036                 }
3037                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3038                     s->mv_dir = MV_DIR_BACKWARD;
3039                     s->mv_type = MV_TYPE_16X16;
3040                     s->mb_intra= 0;
3041                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3042                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3043                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3044                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3045                 }
3046                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3047                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3048                     s->mv_type = MV_TYPE_16X16;
3049                     s->mb_intra= 0;
3050                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3051                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3052                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3053                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3054                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3055                                  &dmin, &next_block, 0, 0);
3056                 }
3057                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3058                     s->mv_dir = MV_DIR_FORWARD;
3059                     s->mv_type = MV_TYPE_FIELD;
3060                     s->mb_intra= 0;
3061                     for(i=0; i<2; i++){
3062                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3063                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3064                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3065                     }
3066                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3067                                  &dmin, &next_block, 0, 0);
3068                 }
3069                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3070                     s->mv_dir = MV_DIR_BACKWARD;
3071                     s->mv_type = MV_TYPE_FIELD;
3072                     s->mb_intra= 0;
3073                     for(i=0; i<2; i++){
3074                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3075                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3076                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3077                     }
3078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3079                                  &dmin, &next_block, 0, 0);
3080                 }
3081                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3082                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3083                     s->mv_type = MV_TYPE_FIELD;
3084                     s->mb_intra= 0;
3085                     for(dir=0; dir<2; dir++){
3086                         for(i=0; i<2; i++){
3087                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3088                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3089                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3090                         }
3091                     }
3092                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3093                                  &dmin, &next_block, 0, 0);
3094                 }
3095                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3096                     s->mv_dir = 0;
3097                     s->mv_type = MV_TYPE_16X16;
3098                     s->mb_intra= 1;
3099                     s->mv[0][0][0] = 0;
3100                     s->mv[0][0][1] = 0;
3101                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3102                                  &dmin, &next_block, 0, 0);
3103                     if(s->h263_pred || s->h263_aic){
3104                         if(best_s.mb_intra)
3105                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3106                         else
3107                             ff_clean_intra_table_entries(s); //old mode?
3108                     }
3109                 }
3110
3111                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3112                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3113                         const int last_qp= backup_s.qscale;
3114                         int qpi, qp, dc[6];
3115                         int16_t ac[6][16];
3116                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3117                         static const int dquant_tab[4]={-1,1,-2,2};
3118                         int storecoefs = s->mb_intra && s->dc_val[0];
3119
3120                         av_assert2(backup_s.dquant == 0);
3121
3122                         //FIXME intra
3123                         s->mv_dir= best_s.mv_dir;
3124                         s->mv_type = MV_TYPE_16X16;
3125                         s->mb_intra= best_s.mb_intra;
3126                         s->mv[0][0][0] = best_s.mv[0][0][0];
3127                         s->mv[0][0][1] = best_s.mv[0][0][1];
3128                         s->mv[1][0][0] = best_s.mv[1][0][0];
3129                         s->mv[1][0][1] = best_s.mv[1][0][1];
3130
3131                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3132                         for(; qpi<4; qpi++){
3133                             int dquant= dquant_tab[qpi];
3134                             qp= last_qp + dquant;
3135                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3136                                 continue;
3137                             backup_s.dquant= dquant;
3138                             if(storecoefs){
3139                                 for(i=0; i<6; i++){
3140                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3141                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3142                                 }
3143                             }
3144
3145                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3146                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3147                             if(best_s.qscale != qp){
3148                                 if(storecoefs){
3149                                     for(i=0; i<6; i++){
3150                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3151                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3152                                     }
3153                                 }
3154                             }
3155                         }
3156                     }
3157                 }
3158                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3159                     int mx= s->b_direct_mv_table[xy][0];
3160                     int my= s->b_direct_mv_table[xy][1];
3161
3162                     backup_s.dquant = 0;
3163                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3164                     s->mb_intra= 0;
3165                     ff_mpeg4_set_direct_mv(s, mx, my);
3166                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3167                                  &dmin, &next_block, mx, my);
3168                 }
3169                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3170                     backup_s.dquant = 0;
3171                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3172                     s->mb_intra= 0;
3173                     ff_mpeg4_set_direct_mv(s, 0, 0);
3174                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3175                                  &dmin, &next_block, 0, 0);
3176                 }
3177                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3178                     int coded=0;
3179                     for(i=0; i<6; i++)
3180                         coded |= s->block_last_index[i];
3181                     if(coded){
3182                         int mx,my;
3183                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3184                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3185                             mx=my=0; //FIXME find the one we actually used
3186                             ff_mpeg4_set_direct_mv(s, mx, my);
3187                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3188                             mx= s->mv[1][0][0];
3189                             my= s->mv[1][0][1];
3190                         }else{
3191                             mx= s->mv[0][0][0];
3192                             my= s->mv[0][0][1];
3193                         }
3194
3195                         s->mv_dir= best_s.mv_dir;
3196                         s->mv_type = best_s.mv_type;
3197                         s->mb_intra= 0;
3198 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3199                         s->mv[0][0][1] = best_s.mv[0][0][1];
3200                         s->mv[1][0][0] = best_s.mv[1][0][0];
3201                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3202                         backup_s.dquant= 0;
3203                         s->skipdct=1;
3204                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3205                                         &dmin, &next_block, mx, my);
3206                         s->skipdct=0;
3207                     }
3208                 }
3209
3210                 s->current_picture.qscale_table[xy] = best_s.qscale;
3211
3212                 copy_context_after_encode(s, &best_s, -1);
3213
3214                 pb_bits_count= put_bits_count(&s->pb);
3215                 flush_put_bits(&s->pb);
3216                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3217                 s->pb= backup_s.pb;
3218
3219                 if(s->data_partitioning){
3220                     pb2_bits_count= put_bits_count(&s->pb2);
3221                     flush_put_bits(&s->pb2);
3222                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3223                     s->pb2= backup_s.pb2;
3224
3225                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3226                     flush_put_bits(&s->tex_pb);
3227                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3228                     s->tex_pb= backup_s.tex_pb;
3229                 }
3230                 s->last_bits= put_bits_count(&s->pb);
3231
3232                 if (CONFIG_H263_ENCODER &&
3233                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3234                     ff_h263_update_motion_val(s);
3235
3236                 if(next_block==0){ //FIXME 16 vs linesize16
3237                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3238                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3239                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3240                 }
3241
3242                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3243                     ff_mpv_reconstruct_mb(s, s->block);
3244             } else {
3245                 int motion_x = 0, motion_y = 0;
3246                 s->mv_type=MV_TYPE_16X16;
3247                 // only one MB-Type possible
3248
3249                 switch(mb_type){
3250                 case CANDIDATE_MB_TYPE_INTRA:
3251                     s->mv_dir = 0;
3252                     s->mb_intra= 1;
3253                     motion_x= s->mv[0][0][0] = 0;
3254                     motion_y= s->mv[0][0][1] = 0;
3255                     break;
3256                 case CANDIDATE_MB_TYPE_INTER:
3257                     s->mv_dir = MV_DIR_FORWARD;
3258                     s->mb_intra= 0;
3259                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3260                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3261                     break;
3262                 case CANDIDATE_MB_TYPE_INTER_I:
3263                     s->mv_dir = MV_DIR_FORWARD;
3264                     s->mv_type = MV_TYPE_FIELD;
3265                     s->mb_intra= 0;
3266                     for(i=0; i<2; i++){
3267                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3268                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3269                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3270                     }
3271                     break;
3272                 case CANDIDATE_MB_TYPE_INTER4V:
3273                     s->mv_dir = MV_DIR_FORWARD;
3274                     s->mv_type = MV_TYPE_8X8;
3275                     s->mb_intra= 0;
3276                     for(i=0; i<4; i++){
3277                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3278                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3279                     }
3280                     break;
3281                 case CANDIDATE_MB_TYPE_DIRECT:
3282                     if (CONFIG_MPEG4_ENCODER) {
3283                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3284                         s->mb_intra= 0;
3285                         motion_x=s->b_direct_mv_table[xy][0];
3286                         motion_y=s->b_direct_mv_table[xy][1];
3287                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3288                     }
3289                     break;
3290                 case CANDIDATE_MB_TYPE_DIRECT0:
3291                     if (CONFIG_MPEG4_ENCODER) {
3292                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3293                         s->mb_intra= 0;
3294                         ff_mpeg4_set_direct_mv(s, 0, 0);
3295                     }
3296                     break;
3297                 case CANDIDATE_MB_TYPE_BIDIR:
3298                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3299                     s->mb_intra= 0;
3300                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3301                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3302                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3303                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3304                     break;
3305                 case CANDIDATE_MB_TYPE_BACKWARD:
3306                     s->mv_dir = MV_DIR_BACKWARD;
3307                     s->mb_intra= 0;
3308                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3309                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3310                     break;
3311                 case CANDIDATE_MB_TYPE_FORWARD:
3312                     s->mv_dir = MV_DIR_FORWARD;
3313                     s->mb_intra= 0;
3314                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3315                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3316                     break;
3317                 case CANDIDATE_MB_TYPE_FORWARD_I:
3318                     s->mv_dir = MV_DIR_FORWARD;
3319                     s->mv_type = MV_TYPE_FIELD;
3320                     s->mb_intra= 0;
3321                     for(i=0; i<2; i++){
3322                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3323                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3324                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3325                     }
3326                     break;
3327                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3328                     s->mv_dir = MV_DIR_BACKWARD;
3329                     s->mv_type = MV_TYPE_FIELD;
3330                     s->mb_intra= 0;
3331                     for(i=0; i<2; i++){
3332                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3333                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3334                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3335                     }
3336                     break;
3337                 case CANDIDATE_MB_TYPE_BIDIR_I:
3338                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3339                     s->mv_type = MV_TYPE_FIELD;
3340                     s->mb_intra= 0;
3341                     for(dir=0; dir<2; dir++){
3342                         for(i=0; i<2; i++){
3343                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3344                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3345                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3346                         }
3347                     }
3348                     break;
3349                 default:
3350                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3351                 }
3352
3353                 encode_mb(s, motion_x, motion_y);
3354
3355                 // RAL: Update last macroblock type
3356                 s->last_mv_dir = s->mv_dir;
3357
3358                 if (CONFIG_H263_ENCODER &&
3359                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3360                     ff_h263_update_motion_val(s);
3361
3362                 ff_mpv_reconstruct_mb(s, s->block);
3363             }
3364
3365             /* clean the MV table in IPS frames for direct mode in B-frames */
3366             if(s->mb_intra /* && I,P,S_TYPE */){
3367                 s->p_mv_table[xy][0]=0;
3368                 s->p_mv_table[xy][1]=0;
3369             }
3370
3371             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3372                 int w= 16;
3373                 int h= 16;
3374
3375                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3376                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3377
3378                 s->current_picture.encoding_error[0] += sse(
3379                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3380                     s->dest[0], w, h, s->linesize);
3381                 s->current_picture.encoding_error[1] += sse(
3382                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3383                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3384                 s->current_picture.encoding_error[2] += sse(
3385                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3386                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3387             }
3388             if(s->loop_filter){
3389                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3390                     ff_h263_loop_filter(s);
3391             }
3392             ff_dlog(s->avctx, "MB %d %d bits\n",
3393                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3394         }
3395     }
3396
3397     //not beautiful here but we must write it before flushing so it has to be here
3398     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3399         ff_msmpeg4_encode_ext_header(s);
3400
3401     write_slice_end(s);
3402
3403     return 0;
3404 }
3405
3406 #define MERGE(field) dst->field += src->field; src->field=0
3407 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3408     MERGE(me.scene_change_score);
3409     MERGE(me.mc_mb_var_sum_temp);
3410     MERGE(me.mb_var_sum_temp);
3411 }
3412
3413 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3414     int i;
3415
3416     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3417     MERGE(dct_count[1]);
3418     MERGE(mv_bits);
3419     MERGE(i_tex_bits);
3420     MERGE(p_tex_bits);
3421     MERGE(i_count);
3422     MERGE(f_count);
3423     MERGE(b_count);
3424     MERGE(skip_count);
3425     MERGE(misc_bits);
3426     MERGE(er.error_count);
3427     MERGE(padding_bug_score);
3428     MERGE(current_picture.encoding_error[0]);
3429     MERGE(current_picture.encoding_error[1]);
3430     MERGE(current_picture.encoding_error[2]);
3431
3432     if (dst->noise_reduction){
3433         for(i=0; i<64; i++){
3434             MERGE(dct_error_sum[0][i]);
3435             MERGE(dct_error_sum[1][i]);
3436         }
3437     }
3438
3439     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3440     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3441     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3442     flush_put_bits(&dst->pb);
3443 }
3444
3445 static int estimate_qp(MpegEncContext *s, int dry_run){
3446     if (s->next_lambda){
3447         s->current_picture_ptr->f->quality =
3448         s->current_picture.f->quality = s->next_lambda;
3449         if(!dry_run) s->next_lambda= 0;
3450     } else if (!s->fixed_qscale) {
3451         int quality = ff_rate_estimate_qscale(s, dry_run);
3452         s->current_picture_ptr->f->quality =
3453         s->current_picture.f->quality = quality;
3454         if (s->current_picture.f->quality < 0)
3455             return -1;
3456     }
3457
3458     if(s->adaptive_quant){
3459         switch(s->codec_id){
3460         case AV_CODEC_ID_MPEG4:
3461             if (CONFIG_MPEG4_ENCODER)
3462                 ff_clean_mpeg4_qscales(s);
3463             break;
3464         case AV_CODEC_ID_H263:
3465         case AV_CODEC_ID_H263P:
3466         case AV_CODEC_ID_FLV1:
3467             if (CONFIG_H263_ENCODER)
3468                 ff_clean_h263_qscales(s);
3469             break;
3470         default:
3471             ff_init_qscale_tab(s);
3472         }
3473
3474         s->lambda= s->lambda_table[0];
3475         //FIXME broken
3476     }else
3477         s->lambda = s->current_picture.f->quality;
3478     update_qscale(s);
3479     return 0;
3480 }
3481
3482 /* must be called before writing the header */
3483 static void set_frame_distances(MpegEncContext * s){
3484     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3485     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3486
3487     if(s->pict_type==AV_PICTURE_TYPE_B){
3488         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3489         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3490     }else{
3491         s->pp_time= s->time - s->last_non_b_time;
3492         s->last_non_b_time= s->time;
3493         av_assert1(s->picture_number==0 || s->pp_time > 0);
3494     }
3495 }
3496
3497 static int encode_picture(MpegEncContext *s, int picture_number)
3498 {
3499     int i, ret;
3500     int bits;
3501     int context_count = s->slice_context_count;
3502
3503     s->picture_number = picture_number;
3504
3505     /* Reset the average MB variance */
3506     s->me.mb_var_sum_temp    =
3507     s->me.mc_mb_var_sum_temp = 0;
3508
3509     /* we need to initialize some time vars before we can encode B-frames */
3510     // RAL: Condition added for MPEG1VIDEO
3511     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3512         set_frame_distances(s);
3513     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3514         ff_set_mpeg4_time(s);
3515
3516     s->me.scene_change_score=0;
3517
3518 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3519
3520     if(s->pict_type==AV_PICTURE_TYPE_I){
3521         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3522         else                        s->no_rounding=0;
3523     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3524         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3525             s->no_rounding ^= 1;
3526     }
3527
3528     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3529         if (estimate_qp(s,1) < 0)
3530             return -1;
3531         ff_get_2pass_fcode(s);
3532     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3533         if(s->pict_type==AV_PICTURE_TYPE_B)
3534             s->lambda= s->last_lambda_for[s->pict_type];
3535         else
3536             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3537         update_qscale(s);
3538     }
3539
3540     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3541         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3542         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3543         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3544         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3545     }
3546
3547     s->mb_intra=0; //for the rate distortion & bit compare functions
3548     for(i=1; i<context_count; i++){
3549         ret = ff_update_duplicate_context(s->thread_context[i], s);
3550         if (ret < 0)
3551             return ret;
3552     }
3553
3554     if(ff_init_me(s)<0)
3555         return -1;
3556
3557     /* Estimate motion for every MB */
3558     if(s->pict_type != AV_PICTURE_TYPE_I){
3559         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3560         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3561         if (s->pict_type != AV_PICTURE_TYPE_B) {
3562             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3563                 s->me_pre == 2) {
3564                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3565             }
3566         }
3567
3568         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3569     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3570         /* I-Frame */
3571         for(i=0; i<s->mb_stride*s->mb_height; i++)
3572             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3573
3574         if(!s->fixed_qscale){
3575             /* finding spatial complexity for I-frame rate control */
3576             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3577         }
3578     }
3579     for(i=1; i<context_count; i++){
3580         merge_context_after_me(s, s->thread_context[i]);
3581     }
3582     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3583     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3584     emms_c();
3585
3586     if (s->me.scene_change_score > s->scenechange_threshold &&
3587         s->pict_type == AV_PICTURE_TYPE_P) {
3588         s->pict_type= AV_PICTURE_TYPE_I;
3589         for(i=0; i<s->mb_stride*s->mb_height; i++)
3590             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3591         if(s->msmpeg4_version >= 3)
3592             s->no_rounding=1;
3593         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3594                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3595     }
3596
3597     if(!s->umvplus){
3598         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3599             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3600
3601             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3602                 int a,b;
3603                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3604                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3605                 s->f_code= FFMAX3(s->f_code, a, b);
3606             }
3607
3608             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3609             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3610             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3611                 int j;
3612                 for(i=0; i<2; i++){
3613                     for(j=0; j<2; j++)
3614                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3615                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3616                 }
3617             }
3618         }
3619
3620         if(s->pict_type==AV_PICTURE_TYPE_B){
3621             int a, b;
3622
3623             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3624             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3625             s->f_code = FFMAX(a, b);
3626
3627             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3628             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3629             s->b_code = FFMAX(a, b);
3630
3631             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3632             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3633             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3634             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3635             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3636                 int dir, j;
3637                 for(dir=0; dir<2; dir++){
3638                     for(i=0; i<2; i++){
3639                         for(j=0; j<2; j++){
3640                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3641                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3642                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3643                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3644                         }
3645                     }
3646                 }
3647             }
3648         }
3649     }
3650
3651     if (estimate_qp(s, 0) < 0)
3652         return -1;
3653
3654     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3655         s->pict_type == AV_PICTURE_TYPE_I &&
3656         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3657         s->qscale= 3; //reduce clipping problems
3658
3659     if (s->out_format == FMT_MJPEG) {
3660         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3661         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3662
3663         if (s->avctx->intra_matrix) {
3664             chroma_matrix =
3665             luma_matrix = s->avctx->intra_matrix;
3666         }
3667         if (s->avctx->chroma_intra_matrix)
3668             chroma_matrix = s->avctx->chroma_intra_matrix;
3669
3670         /* for mjpeg, we do include qscale in the matrix */
3671         for(i=1;i<64;i++){
3672             int j = s->idsp.idct_permutation[i];
3673
3674             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3675             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3676         }
3677         s->y_dc_scale_table=
3678         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3679         s->chroma_intra_matrix[0] =
3680         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3681         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3682                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3683         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3684                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3685         s->qscale= 8;
3686     }
3687     if(s->codec_id == AV_CODEC_ID_AMV){
3688         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3689         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3690         for(i=1;i<64;i++){
3691             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3692
3693             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3694             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3695         }
3696         s->y_dc_scale_table= y;
3697         s->c_dc_scale_table= c;
3698         s->intra_matrix[0] = 13;
3699         s->chroma_intra_matrix[0] = 14;
3700         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3701                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3702         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3703                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3704         s->qscale= 8;
3705     }
3706
3707     if (s->out_format == FMT_SPEEDHQ) {
3708         s->y_dc_scale_table=
3709         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3710     }
3711
3712     //FIXME var duplication
3713     s->current_picture_ptr->f->key_frame =
3714     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3715     s->current_picture_ptr->f->pict_type =
3716     s->current_picture.f->pict_type = s->pict_type;
3717
3718     if (s->current_picture.f->key_frame)
3719         s->picture_in_gop_number=0;
3720
3721     s->mb_x = s->mb_y = 0;
3722     s->last_bits= put_bits_count(&s->pb);
3723     switch(s->out_format) {
3724 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3725     case FMT_MJPEG:
3726         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3727         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3728             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3729                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3730         break;
3731 #endif
3732     case FMT_SPEEDHQ:
3733         if (CONFIG_SPEEDHQ_ENCODER)
3734             ff_speedhq_encode_picture_header(s);
3735         break;
3736     case FMT_H261:
3737         if (CONFIG_H261_ENCODER)
3738             ff_h261_encode_picture_header(s, picture_number);
3739         break;
3740     case FMT_H263:
3741         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3742             ff_wmv2_encode_picture_header(s, picture_number);
3743         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3744             ff_msmpeg4_encode_picture_header(s, picture_number);
3745         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3746             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3747             if (ret < 0)
3748                 return ret;
3749         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3750             ret = ff_rv10_encode_picture_header(s, picture_number);
3751             if (ret < 0)
3752                 return ret;
3753         }
3754         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3755             ff_rv20_encode_picture_header(s, picture_number);
3756         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3757             ff_flv_encode_picture_header(s, picture_number);
3758         else if (CONFIG_H263_ENCODER)
3759             ff_h263_encode_picture_header(s, picture_number);
3760         break;
3761     case FMT_MPEG1:
3762         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3763             ff_mpeg1_encode_picture_header(s, picture_number);
3764         break;
3765     default:
3766         av_assert0(0);
3767     }
3768     bits= put_bits_count(&s->pb);
3769     s->header_bits= bits - s->last_bits;
3770
3771     for(i=1; i<context_count; i++){
3772         update_duplicate_context_after_me(s->thread_context[i], s);
3773     }
3774     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3775     for(i=1; i<context_count; i++){
3776         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3777             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3778         merge_context_after_encode(s, s->thread_context[i]);
3779     }
3780     emms_c();
3781     return 0;
3782 }
3783
3784 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3785     const int intra= s->mb_intra;
3786     int i;
3787
3788     s->dct_count[intra]++;
3789
3790     for(i=0; i<64; i++){
3791         int level= block[i];
3792
3793         if(level){
3794             if(level>0){
3795                 s->dct_error_sum[intra][i] += level;
3796                 level -= s->dct_offset[intra][i];
3797                 if(level<0) level=0;
3798             }else{
3799                 s->dct_error_sum[intra][i] -= level;
3800                 level += s->dct_offset[intra][i];
3801                 if(level>0) level=0;
3802             }
3803             block[i]= level;
3804         }
3805     }
3806 }
3807
3808 static int dct_quantize_trellis_c(MpegEncContext *s,
3809                                   int16_t *block, int n,
3810                                   int qscale, int *overflow){
3811     const int *qmat;
3812     const uint16_t *matrix;
3813     const uint8_t *scantable;
3814     const uint8_t *perm_scantable;
3815     int max=0;
3816     unsigned int threshold1, threshold2;
3817     int bias=0;
3818     int run_tab[65];
3819     int level_tab[65];
3820     int score_tab[65];
3821     int survivor[65];
3822     int survivor_count;
3823     int last_run=0;
3824     int last_level=0;
3825     int last_score= 0;
3826     int last_i;
3827     int coeff[2][64];
3828     int coeff_count[64];
3829     int qmul, qadd, start_i, last_non_zero, i, dc;
3830     const int esc_length= s->ac_esc_length;
3831     uint8_t * length;
3832     uint8_t * last_length;
3833     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3834     int mpeg2_qscale;
3835
3836     s->fdsp.fdct(block);
3837
3838     if(s->dct_error_sum)
3839         s->denoise_dct(s, block);
3840     qmul= qscale*16;
3841     qadd= ((qscale-1)|1)*8;
3842
3843     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3844     else                 mpeg2_qscale = qscale << 1;
3845
3846     if (s->mb_intra) {
3847         int q;
3848         scantable= s->intra_scantable.scantable;
3849         perm_scantable= s->intra_scantable.permutated;
3850         if (!s->h263_aic) {
3851             if (n < 4)
3852                 q = s->y_dc_scale;
3853             else
3854                 q = s->c_dc_scale;
3855             q = q << 3;
3856         } else{
3857             /* For AIC we skip quant/dequant of INTRADC */
3858             q = 1 << 3;
3859             qadd=0;
3860         }
3861
3862         /* note: block[0] is assumed to be positive */
3863         block[0] = (block[0] + (q >> 1)) / q;
3864         start_i = 1;
3865         last_non_zero = 0;
3866         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3867         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3868         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3869             bias= 1<<(QMAT_SHIFT-1);
3870
3871         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3872             length     = s->intra_chroma_ac_vlc_length;
3873             last_length= s->intra_chroma_ac_vlc_last_length;
3874         } else {
3875             length     = s->intra_ac_vlc_length;
3876             last_length= s->intra_ac_vlc_last_length;
3877         }
3878     } else {
3879         scantable= s->inter_scantable.scantable;
3880         perm_scantable= s->inter_scantable.permutated;
3881         start_i = 0;
3882         last_non_zero = -1;
3883         qmat = s->q_inter_matrix[qscale];
3884         matrix = s->inter_matrix;
3885         length     = s->inter_ac_vlc_length;
3886         last_length= s->inter_ac_vlc_last_length;
3887     }
3888     last_i= start_i;
3889
3890     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3891     threshold2= (threshold1<<1);
3892
3893     for(i=63; i>=start_i; i--) {
3894         const int j = scantable[i];
3895         int level = block[j] * qmat[j];
3896
3897         if(((unsigned)(level+threshold1))>threshold2){
3898             last_non_zero = i;
3899             break;
3900         }
3901     }
3902
3903     for(i=start_i; i<=last_non_zero; i++) {
3904         const int j = scantable[i];
3905         int level = block[j] * qmat[j];
3906
3907 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3908 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3909         if(((unsigned)(level+threshold1))>threshold2){
3910             if(level>0){
3911                 level= (bias + level)>>QMAT_SHIFT;
3912                 coeff[0][i]= level;
3913                 coeff[1][i]= level-1;
3914 //                coeff[2][k]= level-2;
3915             }else{
3916                 level= (bias - level)>>QMAT_SHIFT;
3917                 coeff[0][i]= -level;
3918                 coeff[1][i]= -level+1;
3919 //                coeff[2][k]= -level+2;
3920             }
3921             coeff_count[i]= FFMIN(level, 2);
3922             av_assert2(coeff_count[i]);
3923             max |=level;
3924         }else{
3925             coeff[0][i]= (level>>31)|1;
3926             coeff_count[i]= 1;
3927         }
3928     }
3929
3930     *overflow= s->max_qcoeff < max; //overflow might have happened
3931
3932     if(last_non_zero < start_i){
3933         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3934         return last_non_zero;
3935     }
3936
3937     score_tab[start_i]= 0;
3938     survivor[0]= start_i;
3939     survivor_count= 1;
3940
3941     for(i=start_i; i<=last_non_zero; i++){
3942         int level_index, j, zero_distortion;
3943         int dct_coeff= FFABS(block[ scantable[i] ]);
3944         int best_score=256*256*256*120;
3945
3946         if (s->fdsp.fdct == ff_fdct_ifast)
3947             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3948         zero_distortion= dct_coeff*dct_coeff;
3949
3950         for(level_index=0; level_index < coeff_count[i]; level_index++){
3951             int distortion;
3952             int level= coeff[level_index][i];
3953             const int alevel= FFABS(level);
3954             int unquant_coeff;
3955
3956             av_assert2(level);
3957
3958             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3959                 unquant_coeff= alevel*qmul + qadd;
3960             } else if(s->out_format == FMT_MJPEG) {
3961                 j = s->idsp.idct_permutation[scantable[i]];
3962                 unquant_coeff = alevel * matrix[j] * 8;
3963             }else{ // MPEG-1
3964                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3965                 if(s->mb_intra){
3966                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3967                         unquant_coeff =   (unquant_coeff - 1) | 1;
3968                 }else{
3969                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3970                         unquant_coeff =   (unquant_coeff - 1) | 1;
3971                 }
3972                 unquant_coeff<<= 3;
3973             }
3974
3975             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3976             level+=64;
3977             if((level&(~127)) == 0){
3978                 for(j=survivor_count-1; j>=0; j--){
3979                     int run= i - survivor[j];
3980                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3981                     score += score_tab[i-run];
3982
3983                     if(score < best_score){
3984                         best_score= score;
3985                         run_tab[i+1]= run;
3986                         level_tab[i+1]= level-64;
3987                     }
3988                 }
3989
3990                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3991                     for(j=survivor_count-1; j>=0; j--){
3992                         int run= i - survivor[j];
3993                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3994                         score += score_tab[i-run];
3995                         if(score < last_score){
3996                             last_score= score;
3997                             last_run= run;
3998                             last_level= level-64;
3999                             last_i= i+1;
4000                         }
4001                     }
4002                 }
4003             }else{
4004                 distortion += esc_length*lambda;
4005                 for(j=survivor_count-1; j>=0; j--){
4006                     int run= i - survivor[j];
4007                     int score= distortion + score_tab[i-run];
4008
4009                     if(score < best_score){
4010                         best_score= score;
4011                         run_tab[i+1]= run;
4012                         level_tab[i+1]= level-64;
4013                     }
4014                 }
4015
4016                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4017                   for(j=survivor_count-1; j>=0; j--){
4018                         int run= i - survivor[j];
4019                         int score= distortion + score_tab[i-run];
4020                         if(score < last_score){
4021                             last_score= score;
4022                             last_run= run;
4023                             last_level= level-64;
4024                             last_i= i+1;
4025                         }
4026                     }
4027                 }
4028             }
4029         }
4030
4031         score_tab[i+1]= best_score;
4032
4033         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4034         if(last_non_zero <= 27){
4035             for(; survivor_count; survivor_count--){
4036                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4037                     break;
4038             }
4039         }else{
4040             for(; survivor_count; survivor_count--){
4041                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4042                     break;
4043             }
4044         }
4045
4046         survivor[ survivor_count++ ]= i+1;
4047     }
4048
4049     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4050         last_score= 256*256*256*120;
4051         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4052             int score= score_tab[i];
4053             if (i)
4054                 score += lambda * 2; // FIXME more exact?
4055
4056             if(score < last_score){
4057                 last_score= score;
4058                 last_i= i;
4059                 last_level= level_tab[i];
4060                 last_run= run_tab[i];
4061             }
4062         }
4063     }
4064
4065     s->coded_score[n] = last_score;
4066
4067     dc= FFABS(block[0]);
4068     last_non_zero= last_i - 1;
4069     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4070
4071     if(last_non_zero < start_i)
4072         return last_non_zero;
4073
4074     if(last_non_zero == 0 && start_i == 0){
4075         int best_level= 0;
4076         int best_score= dc * dc;
4077
4078         for(i=0; i<coeff_count[0]; i++){
4079             int level= coeff[i][0];
4080             int alevel= FFABS(level);
4081             int unquant_coeff, score, distortion;
4082
4083             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4084                     unquant_coeff= (alevel*qmul + qadd)>>3;
4085             } else{ // MPEG-1
4086                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4087                     unquant_coeff =   (unquant_coeff - 1) | 1;
4088             }
4089             unquant_coeff = (unquant_coeff + 4) >> 3;
4090             unquant_coeff<<= 3 + 3;
4091
4092             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4093             level+=64;
4094             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4095             else                    score= distortion + esc_length*lambda;
4096
4097             if(score < best_score){
4098                 best_score= score;
4099                 best_level= level - 64;
4100             }
4101         }
4102         block[0]= best_level;
4103         s->coded_score[n] = best_score - dc*dc;
4104         if(best_level == 0) return -1;
4105         else                return last_non_zero;
4106     }
4107
4108     i= last_i;
4109     av_assert2(last_level);
4110
4111     block[ perm_scantable[last_non_zero] ]= last_level;
4112     i -= last_run + 1;
4113
4114     for(; i>start_i; i -= run_tab[i] + 1){
4115         block[ perm_scantable[i-1] ]= level_tab[i];
4116     }
4117
4118     return last_non_zero;
4119 }
4120
4121 static int16_t basis[64][64];
4122
4123 static void build_basis(uint8_t *perm){
4124     int i, j, x, y;
4125     emms_c();
4126     for(i=0; i<8; i++){
4127         for(j=0; j<8; j++){
4128             for(y=0; y<8; y++){
4129                 for(x=0; x<8; x++){
4130                     double s= 0.25*(1<<BASIS_SHIFT);
4131                     int index= 8*i + j;
4132                     int perm_index= perm[index];
4133                     if(i==0) s*= sqrt(0.5);
4134                     if(j==0) s*= sqrt(0.5);
4135                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4136                 }
4137             }
4138         }
4139     }
4140 }
4141
4142 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4143                         int16_t *block, int16_t *weight, int16_t *orig,
4144                         int n, int qscale){
4145     int16_t rem[64];
4146     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4147     const uint8_t *scantable;
4148     const uint8_t *perm_scantable;
4149 //    unsigned int threshold1, threshold2;
4150 //    int bias=0;
4151     int run_tab[65];
4152     int prev_run=0;
4153     int prev_level=0;
4154     int qmul, qadd, start_i, last_non_zero, i, dc;
4155     uint8_t * length;
4156     uint8_t * last_length;
4157     int lambda;
4158     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4159
4160     if(basis[0][0] == 0)
4161         build_basis(s->idsp.idct_permutation);
4162
4163     qmul= qscale*2;
4164     qadd= (qscale-1)|1;
4165     if (s->mb_intra) {
4166         scantable= s->intra_scantable.scantable;
4167         perm_scantable= s->intra_scantable.permutated;
4168         if (!s->h263_aic) {
4169             if (n < 4)
4170                 q = s->y_dc_scale;
4171             else
4172                 q = s->c_dc_scale;
4173         } else{
4174             /* For AIC we skip quant/dequant of INTRADC */
4175             q = 1;
4176             qadd=0;
4177         }
4178         q <<= RECON_SHIFT-3;
4179         /* note: block[0] is assumed to be positive */
4180         dc= block[0]*q;
4181 //        block[0] = (block[0] + (q >> 1)) / q;
4182         start_i = 1;
4183 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4184 //            bias= 1<<(QMAT_SHIFT-1);
4185         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4186             length     = s->intra_chroma_ac_vlc_length;
4187             last_length= s->intra_chroma_ac_vlc_last_length;
4188         } else {
4189             length     = s->intra_ac_vlc_length;
4190             last_length= s->intra_ac_vlc_last_length;
4191         }
4192     } else {
4193         scantable= s->inter_scantable.scantable;
4194         perm_scantable= s->inter_scantable.permutated;
4195         dc= 0;
4196         start_i = 0;
4197         length     = s->inter_ac_vlc_length;
4198         last_length= s->inter_ac_vlc_last_length;
4199     }
4200     last_non_zero = s->block_last_index[n];
4201
4202     dc += (1<<(RECON_SHIFT-1));
4203     for(i=0; i<64; i++){
4204         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4205     }
4206
4207     sum=0;
4208     for(i=0; i<64; i++){
4209         int one= 36;
4210         int qns=4;
4211         int w;
4212
4213         w= FFABS(weight[i]) + qns*one;
4214         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4215
4216         weight[i] = w;
4217 //        w=weight[i] = (63*qns + (w/2)) / w;
4218
4219         av_assert2(w>0);
4220         av_assert2(w<(1<<6));
4221         sum += w*w;
4222     }
4223     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4224
4225     run=0;
4226     rle_index=0;
4227     for(i=start_i; i<=last_non_zero; i++){
4228         int j= perm_scantable[i];
4229         const int level= block[j];
4230         int coeff;
4231
4232         if(level){
4233             if(level<0) coeff= qmul*level - qadd;
4234             else        coeff= qmul*level + qadd;
4235             run_tab[rle_index++]=run;
4236             run=0;
4237
4238             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4239         }else{
4240             run++;
4241         }
4242     }
4243
4244     for(;;){
4245         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4246         int best_coeff=0;
4247         int best_change=0;
4248         int run2, best_unquant_change=0, analyze_gradient;
4249         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4250
4251         if(analyze_gradient){
4252             for(i=0; i<64; i++){
4253                 int w= weight[i];
4254
4255                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4256             }
4257             s->fdsp.fdct(d1);
4258         }
4259
4260         if(start_i){
4261             const int level= block[0];
4262             int change, old_coeff;
4263
4264             av_assert2(s->mb_intra);
4265
4266             old_coeff= q*level;
4267
4268             for(change=-1; change<=1; change+=2){
4269                 int new_level= level + change;
4270                 int score, new_coeff;
4271
4272                 new_coeff= q*new_level;
4273                 if(new_coeff >= 2048 || new_coeff < 0)
4274                     continue;
4275
4276                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4277                                                   new_coeff - old_coeff);
4278                 if(score<best_score){
4279                     best_score= score;
4280                     best_coeff= 0;
4281                     best_change= change;
4282                     best_unquant_change= new_coeff - old_coeff;
4283                 }
4284             }
4285         }
4286
4287         run=0;
4288         rle_index=0;
4289         run2= run_tab[rle_index++];
4290         prev_level=0;
4291         prev_run=0;
4292
4293         for(i=start_i; i<64; i++){
4294             int j= perm_scantable[i];
4295             const int level= block[j];
4296             int change, old_coeff;
4297
4298             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4299                 break;
4300
4301             if(level){
4302                 if(level<0) old_coeff= qmul*level - qadd;
4303                 else        old_coeff= qmul*level + qadd;
4304                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4305             }else{
4306                 old_coeff=0;
4307                 run2--;
4308                 av_assert2(run2>=0 || i >= last_non_zero );
4309             }
4310
4311             for(change=-1; change<=1; change+=2){
4312                 int new_level= level + change;
4313                 int score, new_coeff, unquant_change;
4314
4315                 score=0;
4316                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4317                    continue;
4318
4319                 if(new_level){
4320                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4321                     else            new_coeff= qmul*new_level + qadd;
4322                     if(new_coeff >= 2048 || new_coeff <= -2048)
4323                         continue;
4324                     //FIXME check for overflow
4325
4326                     if(level){
4327                         if(level < 63 && level > -63){
4328                             if(i < last_non_zero)
4329                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4330                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4331                             else
4332                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4333                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4334                         }
4335                     }else{
4336                         av_assert2(FFABS(new_level)==1);
4337
4338                         if(analyze_gradient){
4339                             int g= d1[ scantable[i] ];
4340                             if(g && (g^new_level) >= 0)
4341                                 continue;
4342                         }
4343
4344                         if(i < last_non_zero){
4345                             int next_i= i + run2 + 1;
4346                             int next_level= block[ perm_scantable[next_i] ] + 64;
4347
4348                             if(next_level&(~127))
4349                                 next_level= 0;
4350
4351                             if(next_i < last_non_zero)
4352                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4353                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4354                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4355                             else
4356                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4357                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4358                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4359                         }else{
4360                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4361                             if(prev_level){
4362                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4363                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4364                             }
4365                         }
4366                     }
4367                 }else{
4368                     new_coeff=0;
4369                     av_assert2(FFABS(level)==1);
4370
4371                     if(i < last_non_zero){
4372                         int next_i= i + run2 + 1;
4373                         int next_level= block[ perm_scantable[next_i] ] + 64;
4374
4375                         if(next_level&(~127))
4376                             next_level= 0;
4377
4378                         if(next_i < last_non_zero)
4379                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4380                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4381                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4382                         else
4383                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4384                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4385                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4386                     }else{
4387                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4388                         if(prev_level){
4389                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4390                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4391                         }
4392                     }
4393                 }
4394
4395                 score *= lambda;
4396
4397                 unquant_change= new_coeff - old_coeff;
4398                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4399
4400                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4401                                                    unquant_change);
4402                 if(score<best_score){
4403                     best_score= score;
4404                     best_coeff= i;
4405                     best_change= change;
4406                     best_unquant_change= unquant_change;
4407                 }
4408             }
4409             if(level){
4410                 prev_level= level + 64;
4411                 if(prev_level&(~127))
4412                     prev_level= 0;
4413                 prev_run= run;
4414                 run=0;
4415             }else{
4416                 run++;
4417             }
4418         }
4419
4420         if(best_change){
4421             int j= perm_scantable[ best_coeff ];
4422
4423             block[j] += best_change;
4424
4425             if(best_coeff > last_non_zero){
4426                 last_non_zero= best_coeff;
4427                 av_assert2(block[j]);
4428             }else{
4429                 for(; last_non_zero>=start_i; last_non_zero--){
4430                     if(block[perm_scantable[last_non_zero]])
4431                         break;
4432                 }
4433             }
4434
4435             run=0;
4436             rle_index=0;
4437             for(i=start_i; i<=last_non_zero; i++){
4438                 int j= perm_scantable[i];
4439                 const int level= block[j];
4440
4441                  if(level){
4442                      run_tab[rle_index++]=run;
4443                      run=0;
4444                  }else{
4445                      run++;
4446                  }
4447             }
4448
4449             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4450         }else{
4451             break;
4452         }
4453     }
4454
4455     return last_non_zero;
4456 }
4457
4458 /**
4459  * Permute an 8x8 block according to permutation.
4460  * @param block the block which will be permuted according to
4461  *              the given permutation vector
4462  * @param permutation the permutation vector
4463  * @param last the last non zero coefficient in scantable order, used to
4464  *             speed the permutation up
4465  * @param scantable the used scantable, this is only used to speed the
4466  *                  permutation up, the block is not (inverse) permutated
4467  *                  to scantable order!
4468  */
4469 void ff_block_permute(int16_t *block, uint8_t *permutation,
4470                       const uint8_t *scantable, int last)
4471 {
4472     int i;
4473     int16_t temp[64];
4474
4475     if (last <= 0)
4476         return;
4477     //FIXME it is ok but not clean and might fail for some permutations
4478     // if (permutation[1] == 1)
4479     // return;
4480
4481     for (i = 0; i <= last; i++) {
4482         const int j = scantable[i];
4483         temp[j] = block[j];
4484         block[j] = 0;
4485     }
4486
4487     for (i = 0; i <= last; i++) {
4488         const int j = scantable[i];
4489         const int perm_j = permutation[j];
4490         block[perm_j] = temp[j];
4491     }
4492 }
4493
4494 int ff_dct_quantize_c(MpegEncContext *s,
4495                         int16_t *block, int n,
4496                         int qscale, int *overflow)
4497 {
4498     int i, j, level, last_non_zero, q, start_i;
4499     const int *qmat;
4500     const uint8_t *scantable;
4501     int bias;
4502     int max=0;
4503     unsigned int threshold1, threshold2;
4504
4505     s->fdsp.fdct(block);
4506
4507     if(s->dct_error_sum)
4508         s->denoise_dct(s, block);
4509
4510     if (s->mb_intra) {
4511         scantable= s->intra_scantable.scantable;
4512         if (!s->h263_aic) {
4513             if (n < 4)
4514                 q = s->y_dc_scale;
4515             else
4516                 q = s->c_dc_scale;
4517             q = q << 3;
4518         } else
4519             /* For AIC we skip quant/dequant of INTRADC */
4520             q = 1 << 3;
4521
4522         /* note: block[0] is assumed to be positive */
4523         block[0] = (block[0] + (q >> 1)) / q;
4524         start_i = 1;
4525         last_non_zero = 0;
4526         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4527         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4528     } else {
4529         scantable= s->inter_scantable.scantable;
4530         start_i = 0;
4531         last_non_zero = -1;
4532         qmat = s->q_inter_matrix[qscale];
4533         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4534     }
4535     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4536     threshold2= (threshold1<<1);
4537     for(i=63;i>=start_i;i--) {
4538         j = scantable[i];
4539         level = block[j] * qmat[j];
4540
4541         if(((unsigned)(level+threshold1))>threshold2){
4542             last_non_zero = i;
4543             break;
4544         }else{
4545             block[j]=0;
4546         }
4547     }
4548     for(i=start_i; i<=last_non_zero; i++) {
4549         j = scantable[i];
4550         level = block[j] * qmat[j];
4551
4552 //        if(   bias+level >= (1<<QMAT_SHIFT)
4553 //           || bias-level >= (1<<QMAT_SHIFT)){
4554         if(((unsigned)(level+threshold1))>threshold2){
4555             if(level>0){
4556                 level= (bias + level)>>QMAT_SHIFT;
4557                 block[j]= level;
4558             }else{
4559                 level= (bias - level)>>QMAT_SHIFT;
4560                 block[j]= -level;
4561             }
4562             max |=level;
4563         }else{
4564             block[j]=0;
4565         }
4566     }
4567     *overflow= s->max_qcoeff < max; //overflow might have happened
4568
4569     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4570     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4571         ff_block_permute(block, s->idsp.idct_permutation,
4572                       scantable, last_non_zero);
4573
4574     return last_non_zero;
4575 }
4576
4577 #define OFFSET(x) offsetof(MpegEncContext, x)
4578 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4579 static const AVOption h263_options[] = {
4580     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4581     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4582     FF_MPV_COMMON_OPTS
4583 #if FF_API_MPEGVIDEO_OPTS
4584     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4585     FF_MPV_DEPRECATED_A53_CC_OPT
4586     FF_MPV_DEPRECATED_MATRIX_OPT
4587     FF_MPV_DEPRECATED_BFRAME_OPTS
4588 #endif
4589     { NULL },
4590 };
4591
4592 static const AVClass h263_class = {
4593     .class_name = "H.263 encoder",
4594     .item_name  = av_default_item_name,
4595     .option     = h263_options,
4596     .version    = LIBAVUTIL_VERSION_INT,
4597 };
4598
4599 const AVCodec ff_h263_encoder = {
4600     .name           = "h263",
4601     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4602     .type           = AVMEDIA_TYPE_VIDEO,
4603     .id             = AV_CODEC_ID_H263,
4604     .priv_data_size = sizeof(MpegEncContext),
4605     .init           = ff_mpv_encode_init,
4606     .encode2        = ff_mpv_encode_picture,
4607     .close          = ff_mpv_encode_end,
4608     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4609     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4610     .priv_class     = &h263_class,
4611 };
4612
4613 static const AVOption h263p_options[] = {
4614     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4615     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4616     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4617     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4618     FF_MPV_COMMON_OPTS
4619 #if FF_API_MPEGVIDEO_OPTS
4620     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4621     FF_MPV_DEPRECATED_A53_CC_OPT
4622     FF_MPV_DEPRECATED_MATRIX_OPT
4623     FF_MPV_DEPRECATED_BFRAME_OPTS
4624 #endif
4625     { NULL },
4626 };
4627 static const AVClass h263p_class = {
4628     .class_name = "H.263p encoder",
4629     .item_name  = av_default_item_name,
4630     .option     = h263p_options,
4631     .version    = LIBAVUTIL_VERSION_INT,
4632 };
4633
4634 const AVCodec ff_h263p_encoder = {
4635     .name           = "h263p",
4636     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4637     .type           = AVMEDIA_TYPE_VIDEO,
4638     .id             = AV_CODEC_ID_H263P,
4639     .priv_data_size = sizeof(MpegEncContext),
4640     .init           = ff_mpv_encode_init,
4641     .encode2        = ff_mpv_encode_picture,
4642     .close          = ff_mpv_encode_end,
4643     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4644     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4645     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4646     .priv_class     = &h263p_class,
4647 };
4648
4649 static const AVClass msmpeg4v2_class = {
4650     .class_name = "msmpeg4v2 encoder",
4651     .item_name  = av_default_item_name,
4652     .option     = ff_mpv_generic_options,
4653     .version    = LIBAVUTIL_VERSION_INT,
4654 };
4655
4656 const AVCodec ff_msmpeg4v2_encoder = {
4657     .name           = "msmpeg4v2",
4658     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4659     .type           = AVMEDIA_TYPE_VIDEO,
4660     .id             = AV_CODEC_ID_MSMPEG4V2,
4661     .priv_data_size = sizeof(MpegEncContext),
4662     .init           = ff_mpv_encode_init,
4663     .encode2        = ff_mpv_encode_picture,
4664     .close          = ff_mpv_encode_end,
4665     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4666     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4667     .priv_class     = &msmpeg4v2_class,
4668 };
4669
4670 static const AVClass msmpeg4v3_class = {
4671     .class_name = "msmpeg4v3 encoder",
4672     .item_name  = av_default_item_name,
4673     .option     = ff_mpv_generic_options,
4674     .version    = LIBAVUTIL_VERSION_INT,
4675 };
4676
4677 const AVCodec ff_msmpeg4v3_encoder = {
4678     .name           = "msmpeg4",
4679     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4680     .type           = AVMEDIA_TYPE_VIDEO,
4681     .id             = AV_CODEC_ID_MSMPEG4V3,
4682     .priv_data_size = sizeof(MpegEncContext),
4683     .init           = ff_mpv_encode_init,
4684     .encode2        = ff_mpv_encode_picture,
4685     .close          = ff_mpv_encode_end,
4686     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4687     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4688     .priv_class     = &msmpeg4v3_class,
4689 };
4690
4691 static const AVClass wmv1_class = {
4692     .class_name = "wmv1 encoder",
4693     .item_name  = av_default_item_name,
4694     .option     = ff_mpv_generic_options,
4695     .version    = LIBAVUTIL_VERSION_INT,
4696 };
4697
4698 const AVCodec ff_wmv1_encoder = {
4699     .name           = "wmv1",
4700     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4701     .type           = AVMEDIA_TYPE_VIDEO,
4702     .id             = AV_CODEC_ID_WMV1,
4703     .priv_data_size = sizeof(MpegEncContext),
4704     .init           = ff_mpv_encode_init,
4705     .encode2        = ff_mpv_encode_picture,
4706     .close          = ff_mpv_encode_end,
4707     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4708     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4709     .priv_class     = &wmv1_class,
4710 };